diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e6b35735f9..2332546010 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,10 +15,10 @@ List issues that this PR closes ([syntax](https://docs.github.com/en/issues/trac # Before your PR is "Ready for review" **Pre checks**: -- [ ] Make sure you read and followed [Contributor guidelines](/NVIDIA/reinforcer/blob/main/CONTRIBUTING.md) +- [ ] Make sure you read and followed [Contributor guidelines](/NVIDIA/nemo-rl/blob/main/CONTRIBUTING.md) - [ ] Did you write any new necessary tests? -- [ ] Did you run the unit tests and functional tests locally? Visit our [Testing Guide](/NVIDIA/reinforcer/blob/main/docs/testing.md) for how to run tests -- [ ] Did you add or update any necessary documentation? Visit our [Document Development Guide](/NVIDIA/reinforcer/blob/main/docs/documentation.md) for how to write, build and test the docs. +- [ ] Did you run the unit tests and functional tests locally? Visit our [Testing Guide](/NVIDIA/nemo-rl/blob/main/docs/testing.md) for how to run tests +- [ ] Did you add or update any necessary documentation? Visit our [Document Development Guide](/NVIDIA/nemo-rl/blob/main/docs/documentation.md) for how to write, build and test the docs. # Additional Information * ... diff --git a/.github/workflows/_run_test.yml b/.github/workflows/_run_test.yml index 736b61222c..5fa71f4215 100644 --- a/.github/workflows/_run_test.yml +++ b/.github/workflows/_run_test.yml @@ -68,7 +68,7 @@ jobs: - name: Docker pull image run: | - docker pull nemoci.azurecr.io/nemo_reinforcer_container:${{ github.run_id }} + docker pull nemoci.azurecr.io/nemo_rl_container:${{ github.run_id }} - name: Checkout repository uses: actions/checkout@v4 @@ -80,22 +80,22 @@ jobs: docker run --rm -u root -d --name nemo_container_${{ github.run_id }} --runtime=nvidia --gpus all --shm-size=64g \ --env TRANSFORMERS_OFFLINE=0 \ --env HYDRA_FULL_ERROR=1 \ - --env HF_HOME=/home/TestData/reinforcer/hf_home \ - --env HF_DATASETS_CACHE=/home/TestData/reinforcer/hf_datasets_cache \ - --env REINFORCER_REPO_DIR=/opt/reinforcer \ + --env HF_HOME=/home/TestData/nemo-rl/hf_home \ + --env HF_DATASETS_CACHE=/home/TestData/nemo-rl/hf_datasets_cache \ + --env NEMO_RL_REPO_DIR=/opt/nemo-rl \ --env HF_TOKEN \ - --volume $GITHUB_WORKSPACE:/opt/reinforcer \ + --volume $GITHUB_WORKSPACE:/opt/nemo-rl \ --volume $GITHUB_ACTION_DIR:$GITHUB_ACTION_DIR \ - --volume /mnt/datadrive/TestData/reinforcer/datasets:/opt/reinforcer/datasets:ro \ - --volume /mnt/datadrive/TestData/reinforcer/checkpoints:/home/TestData/reinforcer/checkpoints:ro \ - --volume /mnt/datadrive/TestData/reinforcer/hf_home/hub:/home/TestData/reinforcer/hf_home/hub \ - --volume /mnt/datadrive/TestData/reinforcer/hf_datasets_cache:/home/TestData/reinforcer/hf_datasets_cache \ - nemoci.azurecr.io/nemo_reinforcer_container:${{ github.run_id }} \ + --volume /mnt/datadrive/TestData/nemo-rl/datasets:/opt/nemo-rl/datasets:ro \ + --volume /mnt/datadrive/TestData/nemo-rl/checkpoints:/home/TestData/nemo-rl/checkpoints:ro \ + --volume /mnt/datadrive/TestData/nemo-rl/hf_home/hub:/home/TestData/nemo-rl/hf_home/hub \ + --volume /mnt/datadrive/TestData/nemo-rl/hf_datasets_cache:/home/TestData/nemo-rl/hf_datasets_cache \ + nemoci.azurecr.io/nemo_rl_container:${{ github.run_id }} \ bash -c "sleep $(( ${{ inputs.TIMEOUT }} * 60 + 60 ))" - name: Run unit tests run: | - docker exec nemo_container_${{ github.run_id }} git config --global --add safe.directory /opt/reinforcer + docker exec nemo_container_${{ github.run_id }} git config --global --add safe.directory /opt/nemo-rl docker exec nemo_container_${{ github.run_id }} bash -eux -o pipefail -c " # This is needed since we create virtualenvs in the workspace, so this allows it to be cleaned up if necessary umask 000 @@ -141,6 +141,6 @@ jobs: if: always() run: | # Ensure any added files in the mounted directory are owned by the runner user to allow it to clean up - docker exec nemo_container_${{ github.run_id }} bash -c "find /opt/reinforcer -path '/opt/reinforcer/datasets' -prune -o -exec chown $(id -u):$(id -g) {} +" + docker exec nemo_container_${{ github.run_id }} bash -c "find /opt/nemo-rl -path '/opt/nemo-rl/datasets' -prune -o -exec chown $(id -u):$(id -g) {} +" docker container stop nemo_container_${{ github.run_id }} || true docker container rm nemo_container_${{ github.run_id }} || true diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 876c43021a..26df7e2093 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -name: "CICD Reinforcer" +name: "CICD NeMo RL" on: pull_request: @@ -136,12 +136,12 @@ jobs: uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_container.yml@v0.22.7 with: build-ref: ${{ github.sha }} - image-name: nemo_reinforcer_container + image-name: nemo_rl_container dockerfile: docker/Dockerfile - image-label: nemo-reinforcer + image-label: nemo-rl build-args: | MAX_JOBS=32 - REINFORCER_COMMIT=${{ github.sha }} + NEMO_RL_COMMIT=${{ github.sha }} tests: name: Tests @@ -152,21 +152,21 @@ jobs: RUNNER: self-hosted-azure TIMEOUT: 60 UNIT_TEST_SCRIPT: | - cd /opt/reinforcer + cd /opt/nemo-rl if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(L0|L1|L2)$ ]]; then uv run --no-sync bash -x ./tests/run_unit.sh else echo Skipping unit tests for docs-only level fi DOC_TEST_SCRIPT: | - cd /opt/reinforcer/docs + cd /opt/nemo-rl/docs if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(docs|L0|L1|L2)$ ]]; then uv run --no-sync sphinx-build -b doctest . _build/doctest else echo Skipping doc tests for level ${{ needs.pre-flight.outputs.test_level }} fi FUNCTIONAL_TEST_SCRIPT: | - cd /opt/reinforcer + cd /opt/nemo-rl if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(L1|L2)$ ]]; then uv run --no-sync bash ./tests/functional/sft.sh uv run --no-sync bash ./tests/functional/grpo.sh @@ -177,7 +177,7 @@ jobs: fi # TODO: enable once we have convergence tests in CI #CONVERGENCE_TEST_SCRIPT: | - # cd /opt/reinforcer + # cd /opt/nemo-rl # if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(L2)$ ]]; then # echo "Running convergence tests" # # Add your convergence test commands here @@ -186,7 +186,7 @@ jobs: # echo "Skipping convergence tests for level ${{ needs.pre-flight.outputs.test_level }}" # fi AFTER_SCRIPT: | - cd /opt/reinforcer + cd /opt/nemo-rl cat <100B Parameters, scaling from 1 GPU to 100s +# Nemo-RL: A Scalable and Efficient Post-Training Library for Models Ranging from tiny to >100B Parameters, scaling from 1 GPU to 100s -- [Nemo-Reinforcer: A Scalable and Efficient Post-Training Library for Models Ranging from tiny to \>100B Parameters, scaling from 1 GPU to 100s](#nemo-reinforcer-a-scalable-and-efficient-post-training-library-for-models-ranging-from-tiny-to-100b-parameters-scaling-from-1-gpu-to-100s) +- [Nemo-RL: A Scalable and Efficient Post-Training Library for Models Ranging from tiny to \>100B Parameters, scaling from 1 GPU to 100s](#nemo-rl-a-scalable-and-efficient-post-training-library-for-models-ranging-from-tiny-to-100b-parameters-scaling-from-1-gpu-to-100s) - [Features](#features) - [Prerequisuites](#prerequisuites) - [Quick start](#quick-start) @@ -17,7 +17,7 @@ - [Multi-node](#multi-node-2) - [Cluster Start](#cluster-start) -**Nemo-Reinforcer** is a scalable and efficient post-training library designed for models ranging from 1 GPU to thousands, and from tiny to over 100 billion parameters. +**Nemo-RL** is a scalable and efficient post-training library designed for models ranging from 1 GPU to thousands, and from tiny to over 100 billion parameters. What you can expect: @@ -52,8 +52,8 @@ What you can expect: Clone **NeMo RL** ```sh -git clone git@github.com:NVIDIA/reinforcer.git -cd reinforcer +git clone git@github.com:NVIDIA/nemo-rl.git +cd nemo-rl ``` Install `uv` @@ -111,7 +111,7 @@ uv run python examples/run_grpo_math.py \ #### Multi-node ```sh -# Run from the root of NeMo-Reinforcer repo +# Run from the root of NeMo-RL repo NUM_ACTOR_NODES=2 # grpo_math_8b uses Llama-3.1-8B-Instruct model @@ -131,7 +131,7 @@ sbatch \ ##### GRPO Qwen2.5-32B ```sh -# Run from the root of NeMo-Reinforcer repo +# Run from the root of NeMo-RL repo NUM_ACTOR_NODES=16 # Download Qwen before the job starts to avoid spending time downloading during the training loop @@ -187,7 +187,7 @@ Refer to `examples/configs/sft.yaml` for a full list of parameters that can be o #### Multi-node ```sh -# Run from the root of NeMo-Reinforcer repo +# Run from the root of NeMo-RL repo NUM_ACTOR_NODES=2 COMMAND="uv run ./examples/run_sft.py --config examples/configs/sft.yaml cluster.num_nodes=2 cluster.gpus_per_node=8 checkpointing.checkpoint_dir='results/sft_llama8b_2nodes' logger.wandb_enabled=True logger.wandb.name='sft-llama8b'" \ @@ -244,7 +244,7 @@ Refer to [dpo.yaml](examples/configs/dpo.yaml) for a full list of parameters tha For distributed DPO training across multiple nodes, modify the following script for your use case: ```sh -# Run from the root of NeMo-Reinforcer repo +# Run from the root of NeMo-RL repo ## number of nodes to use for your job NUM_ACTOR_NODES=2 diff --git a/docker/Dockerfile b/docker/Dockerfile index b7d39c841f..b1977a4ac9 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -22,12 +22,12 @@ WORKDIR /opt/reinforcer # First copy only the dependency files COPY --chown=ray --chmod=755 pyproject.toml uv.lock ./ -ENV UV_PROJECT_ENVIRONMENT=/opt/reinforcer_venv -ENV VIRTUAL_ENV=/opt/reinforcer_venv +ENV UV_PROJECT_ENVIRONMENT=/opt/nemo_rl_venv +ENV VIRTUAL_ENV=/opt/nemo_rl_venv # Create and activate virtual environment RUN <<"EOF" -uv venv /opt/reinforcer_venv +uv venv /opt/nemo_rl_venv # uv sync has a more reliable resolver than simple uv pip install which can fail # Sync each training + inference backend one at a time (since they may conflict) @@ -38,7 +38,7 @@ uv sync --locked --extra vllm --no-install-project uv sync --locked --all-groups --no-install-project EOF -ENV PATH="/opt/reinforcer_venv/bin:$PATH" +ENV PATH="/opt/nemo_rl_venv/bin:$PATH" # The ray images automatically activate the anaconda venv. We will # comment this out of the .bashrc to give the same UX between docker diff --git a/docs/adding-new-models.md b/docs/adding-new-models.md index 673cc602bf..9afcb46cf9 100644 --- a/docs/adding-new-models.md +++ b/docs/adding-new-models.md @@ -1,6 +1,6 @@ # Adding New Models -This guide outlines how to integrate and validate a new model within **NeMo-Reinforcer**. Each new model must pass a standard set of compatibility tests before being considered ready to be used in RL pipelines. +This guide outlines how to integrate and validate a new model within **NeMo-RL**. Each new model must pass a standard set of compatibility tests before being considered ready to be used in RL pipelines. ## Importance of Log Probability Consistency in Training and Inference @@ -120,4 +120,4 @@ When validating your model, you should analyze the results across different conf --- -By following these validation steps and ensuring your model's outputs remain consistent across backends, you can confirm that your new model meets **NeMo-Reinforcer**'s requirements. \ No newline at end of file +By following these validation steps and ensuring your model's outputs remain consistent across backends, you can confirm that your new model meets **NeMo-RL**'s requirements. \ No newline at end of file diff --git a/docs/cluster.md b/docs/cluster.md index 8a73288e09..260acaeb1e 100644 --- a/docs/cluster.md +++ b/docs/cluster.md @@ -12,7 +12,7 @@ ### Batched Job Submission ```sh -# Run from the root of NeMo-Reinforcer repo +# Run from the root of NeMo-RL repo NUM_ACTOR_NODES=1 # Total nodes requested (head is colocated on ray-worker-0) COMMAND="uv run ./examples/run_grpo_math.py" \ @@ -43,12 +43,12 @@ tail -f 1980204-logs/ray-driver.log ### Interactive Launching :::{tip} -A key advantage of running interactively on the head node is the ability to execute multiple multi-node jobs without needing to requeue in the SLURM job queue. This means during debugging sessions, you can avoid submitting a new `sbatch` command each time and instead debug and re-submit your Reinforcer job directly from the interactive session. +A key advantage of running interactively on the head node is the ability to execute multiple multi-node jobs without needing to requeue in the SLURM job queue. This means during debugging sessions, you can avoid submitting a new `sbatch` command each time and instead debug and re-submit your NeMo-RL job directly from the interactive session. ::: To run interactively, launch the same command as the [Batched Job Submission](#batched-job-submission) except omit the `COMMAND` line: ```sh -# Run from the root of NeMo-Reinforcer repo +# Run from the root of NeMo-RL repo NUM_ACTOR_NODES=1 # Total nodes requested (head is colocated on ray-worker-0) CONTAINER=YOUR_CONTAINER \ diff --git a/docs/conf.py b/docs/conf.py index c9f61d4faf..af0ac8d888 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -23,7 +23,7 @@ import os import sys -project = "NeMo-Reinforcer" +project = "NeMo-RL" copyright = "2025, NVIDIA Corporation" author = "NVIDIA Corporation" release = "0.0.1" @@ -59,7 +59,7 @@ sys.path.insert(0, os.path.abspath("..")) autodoc2_packages = [ - "../nemo_reinforcer", # Path to your package relative to conf.py + "../nemo_rl", # Path to your package relative to conf.py ] autodoc2_render_plugin = "myst" # Use MyST for rendering docstrings autodoc2_output_dir = "apidocs" # Output directory for autodoc2 (relative to docs/) diff --git a/docs/design-docs/checkpointing.md b/docs/design-docs/checkpointing.md index 9b9a6f6826..63e6c2e0c9 100644 --- a/docs/design-docs/checkpointing.md +++ b/docs/design-docs/checkpointing.md @@ -1,9 +1,9 @@ # Checkpointing with HuggingFace Models ## Checkpoint Format -Reinforcer provides two checkpoint formats for HuggingFace models: Torch distributed and HuggingFace format. Torch distributed is used by default for efficiency, and HuggingFace format is provided for compatibility with HuggingFace's `AutoModel.from_pretrained` API. Note that HuggingFace format checkpoints save only the model weights, ignoring the optimizer states. It is recommended to use Torch distributed format to save intermediate checkpoints and to save a HuggingFace checkpoint only at the end of training. +NeMo-RL provides two checkpoint formats for HuggingFace models: Torch distributed and HuggingFace format. Torch distributed is used by default for efficiency, and HuggingFace format is provided for compatibility with HuggingFace's `AutoModel.from_pretrained` API. Note that HuggingFace format checkpoints save only the model weights, ignoring the optimizer states. It is recommended to use Torch distributed format to save intermediate checkpoints and to save a HuggingFace checkpoint only at the end of training. -There are two ways to get a Reinforcer checkpoint in HuggingFace format. +There are two ways to get a NeMo-RL checkpoint in HuggingFace format. 1. (Recommended) Save the HuggingFace checkpoint directly by passing `save_hf=True` to `HFPolicy`'s `save_checkpoint`: diff --git a/docs/design-docs/design-and-philosophy.md b/docs/design-docs/design-and-philosophy.md index e9fead87e8..00d6284b3b 100644 --- a/docs/design-docs/design-and-philosophy.md +++ b/docs/design-docs/design-and-philosophy.md @@ -1,5 +1,5 @@ # Design and Philosophy -In this section, we will describe the problems this library aims to solve and motivate/dicuss the Reinforcer APIs. +In this section, we will describe the problems this library aims to solve and motivate/dicuss the NeMo-RL APIs. ## Motivation Online RL requires coordinating a lot of different pieces of software/models @@ -19,8 +19,8 @@ Fundamentally, we need to be able to do 4 things between these RL Actors: ## Design We create composable and hackable abstractions for each layer of the tasks above -- Resourcing -> {py:class}`RayVirtualCluster ` -- Isolation -> {py:class}`RayWorkerGroup ` +- Resourcing -> {py:class}`RayVirtualCluster ` +- Isolation -> {py:class}`RayWorkerGroup ` - Coordination -> A Single-Process Controller using Ray - Communication -> Data flows through one of the following: - the single controller @@ -32,7 +32,7 @@ By creating a common interface for these 4 tasks, **RL algorithm code looks the ![actor-wg-worker-vc](../assets/actor-wg-worker-vc.png) -### {py:class}`RayVirtualCluster ` +### {py:class}`RayVirtualCluster ` VirtualCluster provides a basic abstraction on top of Ray Placement Groups that allow you to section off a part of your compute resources for WorkerGroups to run on as though they had their own cluster. They support running just one WorkerGroup on each VirtualCluster, or *colocation*, where multiple WorkerGroups share resources (i.e running policy training(hf) and generation(vllm) on the same GPUs in-turn). Minimally, it has has the following core API: @@ -68,7 +68,7 @@ class RayVirtualCluster: """ ``` -### {py:class}`RayWorkerGroup ` +### {py:class}`RayWorkerGroup ` All work is done by "Worker Processes"(Ray Actors) that run on a small unit of resources (usually 1 CPU or 1 CPU+GPU). These workers are managed by *RayWorkerGroup* ```python class RayWorkerGroup: @@ -109,4 +109,4 @@ def grpo_train( training_data = calculate_grpo_trainnig_data(generations, logprobs, reference_logprobs, rewards) policy.train(generations, logprobs, reference_logprobs, GRPOLossFn) ``` -For a real implementation of grpo (with valiation, checkpointing, memory movement, and the omitted data processing steps), see [grpo_train](../../nemo_reinforcer/algorithms/grpo.py) +For a real implementation of grpo (with valiation, checkpointing, memory movement, and the omitted data processing steps), see [grpo_train](../../nemo_rl/algorithms/grpo.py) diff --git a/docs/design-docs/generation.md b/docs/design-docs/generation.md index b519b2d249..72c2554d92 100644 --- a/docs/design-docs/generation.md +++ b/docs/design-docs/generation.md @@ -1,6 +1,6 @@ # Generation Module -This doc explains the token generation interface and various backends for the NeMo Reinforcer framework. The generation system is designed with a unified interface that allows different backends (like VLLM, HuggingFace, SGLang, TRT-LLM) to provide token generation capabilities while adhering to the same API. +This doc explains the token generation interface and various backends for the NeMo-RL framework. The generation system is designed with a unified interface that allows different backends (like VLLM, HuggingFace, SGLang, TRT-LLM) to provide token generation capabilities while adhering to the same API. ## Generation Interface @@ -62,11 +62,11 @@ A key thing to note about generation backends is that the generation backend tak ## VLLM Backend -The VLLM backend (`models/generation/vllm.py`) implements the {py:class}`GenerationInterface ` to provide efficient text generation using the VLLM library, which is optimized for large language models. +The VLLM backend (`models/generation/vllm.py`) implements the {py:class}`GenerationInterface ` to provide efficient text generation using the VLLM library, which is optimized for large language models. ### VllmGeneration Class -The {py:class}`VllmGeneration ` class is the main implementation of the {py:class}`GenerationInterface ` for VLLM. It: +The {py:class}`VllmGeneration ` class is the main implementation of the {py:class}`GenerationInterface ` for VLLM. It: 1. Sets up VLLM workers in a distributed environment using Ray 2. Manages the lifecycle of these workers (initialization, generation, shutdown) @@ -75,7 +75,7 @@ The {py:class}`VllmGeneration ` is a Ray actor that: +The {py:class}`VllmGenerationWorker ` is a Ray actor that: 1. Initializes and manages a VLLM model instance 2. Performs the actual generation on a GPU @@ -84,7 +84,7 @@ The {py:class}`VllmGenerationWorker ` class in `vllm_backend.py` extends the VLLM worker with additional capabilities: +The {py:class}`UpdatableVllmInternalWorker ` class in `vllm_backend.py` extends the VLLM worker with additional capabilities: 1. Reporting device IDs to allow mapping of workers to specific GPUs 2. Updating weights from IPC handles for efficient weight sharing @@ -95,11 +95,11 @@ The {py:class}`UpdatableVllmInternalWorker ` -2. Implement the required methods: {py:meth}`generate `, {py:meth}`prepare_for_generation `, and {py:meth}`finish_generation ` -3. Ensure your implementation works with the standard {py:class}`GenerationConfig ` and {py:class}`GenerationDatumSpec ` structures +1. Create a new class that implements {py:class}`GenerationInterface ` +2. Implement the required methods: {py:meth}`generate `, {py:meth}`prepare_for_generation `, and {py:meth}`finish_generation ` +3. Ensure your implementation works with the standard {py:class}`GenerationConfig ` and {py:class}`GenerationDatumSpec ` structures 4. Register your backend with the system (if needed) to make it accessible This modular design allows for easy extension with new backends while maintaining a consistent interface for the rest of the system. diff --git a/docs/design-docs/logger.md b/docs/design-docs/logger.md index fa81c7c291..8578fe621e 100644 --- a/docs/design-docs/logger.md +++ b/docs/design-docs/logger.md @@ -12,7 +12,7 @@ Since there is a single controller, the single process running the main training loop will gather the metrics and do the logging. -To handle multiple logger backends, we will have a {py:class}`LoggerInterface ` interface that the {py:class}`TensorboardLogger ` and {py:class}`WandbLogger ` will implement: +To handle multiple logger backends, we will have a {py:class}`LoggerInterface ` interface that the {py:class}`TensorboardLogger ` and {py:class}`WandbLogger ` will implement: ```python class LoggerInterface(ABC): @@ -29,7 +29,7 @@ class LoggerInterface(ABC): pass ``` -A {py:class}`Logger ` wrapper class will also implement {py:class}`LoggerInterface ` and will contain a list of loggers it delegates to when writing logs. This will be the main class the user uses in the training loop. Usage example: +A {py:class}`Logger ` wrapper class will also implement {py:class}`LoggerInterface ` and will contain a list of loggers it delegates to when writing logs. This will be the main class the user uses in the training loop. Usage example: ```python # Initialize logger with both wandb and tensorboard enabled @@ -80,7 +80,7 @@ When enabled, the pretty logging will generate formatted text similar to: ## GPU Metric Logging -Reinforcer monitors GPU memory and utilization through [system metrics](https://docs.ray.io/en/latest/ray-observability/reference/system-metrics.html#system-metrics) exposed by Ray nodes. While Ray makes these metrics available for tools like Prometheus, Reinforcer directly polls GPU memory and utilization data and logs them to TensorBoard and/or Weights & Biases. +NeMo-RL monitors GPU memory and utilization through [system metrics](https://docs.ray.io/en/latest/ray-observability/reference/system-metrics.html#system-metrics) exposed by Ray nodes. While Ray makes these metrics available for tools like Prometheus, NeMo-RL directly polls GPU memory and utilization data and logs them to TensorBoard and/or Weights & Biases. This approach allows us to offer the same GPU metric tracking on all loggers (not just wandb) and simplifies the implementation greatly. diff --git a/docs/design-docs/padding.md b/docs/design-docs/padding.md index d5949cf3b5..219e91573f 100644 --- a/docs/design-docs/padding.md +++ b/docs/design-docs/padding.md @@ -1,12 +1,12 @@ -# Padding in NeMo Reinforcer +# Padding in NeMo RL ## Overview -This document explains padding in NeMo Reinforcer and why consistent padding is critical for the framework. +This document explains padding in NeMo RL and why consistent padding is critical for the framework. ## Padding Approach -NeMo Reinforcer uses **right padding** for all tensor operations, where padding tokens are added to the right/end of sequences: +NeMo RL uses **right padding** for all tensor operations, where padding tokens are added to the right/end of sequences: ``` [101, 2054, 2003, 0, 0] # Length 3 @@ -37,12 +37,12 @@ Corresponding logprobs: ## Verifying Right Padding -NeMo Reinforcer provides utilities to verify correct padding: +NeMo RL provides utilities to verify correct padding: ```{testcode} import torch -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.models.generation.interfaces import verify_right_padding +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.models.generation.interfaces import verify_right_padding # For input data (BatchedDataDict containing input_ids and input_lengths) input_data = BatchedDataDict({ @@ -78,7 +78,7 @@ if not is_right_padded: :hide: ``` -The {py:class}`verify_right_padding() ` function checks that: +The {py:class}`verify_right_padding() ` function checks that: 1. All padding (zeros or padding token provided by the user) appears after valid tokens 2. The padding starts at the position specified by the length tensor @@ -93,6 +93,6 @@ The function automatically detects whether you're passing input or output data: 2. **Track Length Tensors**: Include appropriate length tensors with your data -3. **Verify Padding**: Use {py:class}`verify_right_padding() ` when in doubt +3. **Verify Padding**: Use {py:class}`verify_right_padding() ` when in doubt 4. **Mask Padding in Operations**: Use lengths to exclude padding tokens from loss calculations diff --git a/docs/design-docs/uv.md b/docs/design-docs/uv.md index 47e4a09c05..12d8368501 100644 --- a/docs/design-docs/uv.md +++ b/docs/design-docs/uv.md @@ -1,10 +1,10 @@ -# uv in NeMo-Reinforcer +# uv in NeMo-RL -Using `uv` for Dependency Management in NeMo-Reinforcer +Using `uv` for Dependency Management in NeMo-RL ## Overview -`uv` is an incredible tool that simplifies our workflow and is blazingly fast because it's written in Rust. This document outlines why we've adopted `uv` for package management in our repository, particularly for NeMo Reinforcer, and how it helps us manage dependencies across Ray clusters. +`uv` is an incredible tool that simplifies our workflow and is blazingly fast because it's written in Rust. This document outlines why we've adopted `uv` for package management in our repository, particularly for NeMo RL, and how it helps us manage dependencies across Ray clusters. ## Why `uv`? @@ -32,7 +32,7 @@ Using `uv` for Dependency Management in NeMo-Reinforcer - Allows us to define different [dependency groups](https://docs.astral.sh/uv/concepts/projects/dependencies/#dependency-groups) and [extras](https://docs.astral.sh/uv/concepts/projects/dependencies/#optional-dependencies) and select which ones we need dynamically - Reduces infrastructure complexity and maintenance overhead -## Implementation in NeMo Reinforcer +## Implementation in NeMo RL ### Worker Configuration @@ -40,16 +40,16 @@ In our codebase, workers (classes decorated with `@ray.remote`, e.g., `HFPolicyW ### Supported Python Executables -We provide several predefined Python executable configurations in {py:class}`PY_EXECUTABLES `: +We provide several predefined Python executable configurations in {py:class}`PY_EXECUTABLES `: ```python class PY_EXECUTABLES: SYSTEM = sys.executable - # Use NeMo-Reinforcer direct dependencies. + # Use NeMo-RL direct dependencies. BASE = "uv run --locked" - # Use NeMo-Reinforcer direct dependencies and vllm. + # Use NeMo-RL direct dependencies and vllm. VLLM = "uv run --locked --extra vllm" ``` @@ -57,18 +57,18 @@ To ensure consistent dependencies between actors, we run with `--locked` to make ### Customization -If you need a different Python executable configuration, you can override the default one by passing your own in {py:class}`RayWorkerBuilder.__call__ `. This provides flexibility for special use cases without modifying the core configurations. +If you need a different Python executable configuration, you can override the default one by passing your own in {py:class}`RayWorkerBuilder.__call__ `. This provides flexibility for special use cases without modifying the core configurations. ## How It Works -When a Reinforcer job is started: +When a NeMo-RL job is started: -1. The driver script creates several {py:class}`RayWorkerGroup `s. -2. Each worker group will create their workers which are wrapped in a {py:class}`RayWorkerBuilder ` +1. The driver script creates several {py:class}`RayWorkerGroup `s. +2. Each worker group will create their workers which are wrapped in a {py:class}`RayWorkerBuilder ` 3. Before the worker class is instantiated by the `RayWorkerBuilder`, if (1) `DEFAULT_PY_EXECUTABLE` is defined on the worker class (decorated with `@ray.remote`) and (2) it starts with `uv`; a `venv` is created with all the dependencies it needs and the `runtime_env["py_executable"]` is replaced with the `venv`'s python interpreter. This approach allows a fast start-up and maintains dependency isolation. It also has the added benefit of having all the virtual environments local under `./venvs`. ## Conclusion -Using `uv` for dependency management in NeMo Reinforcer provides us with a fast, flexible, and reliable way to handle Python dependencies across distributed Ray clusters. It eliminates many of the traditional pain points of dependency management in distributed systems while enabling heterogeneous environments that can be tailored to specific workloads. +Using `uv` for dependency management in NeMo RL provides us with a fast, flexible, and reliable way to handle Python dependencies across distributed Ray clusters. It eliminates many of the traditional pain points of dependency management in distributed systems while enabling heterogeneous environments that can be tailored to specific workloads. diff --git a/docs/docker.md b/docs/docker.md index 5ea3581ea3..fd42a5b404 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -6,7 +6,7 @@ If you only need the base image with ray + uv, you can build it like so: ```sh cd docker/ -docker buildx build --target base -t reinforcer -f Dockerfile .. +docker buildx build --target base -t nemo_rl -f Dockerfile .. ``` This is **our recommendation** as it is a small image and allows you to specify your python dependencies at runtime. @@ -17,7 +17,7 @@ The docker image build without a target stage will include all of the default de ```sh cd docker/ -docker buildx build -t reinforcer -f Dockerfile .. +docker buildx build -t nemo_rl -f Dockerfile .. ``` This image sets up the python environment for you, so you do not have to use `uv` if you don't need diff --git a/docs/documentation.md b/docs/documentation.md index c94239f213..df285cca68 100644 --- a/docs/documentation.md +++ b/docs/documentation.md @@ -9,7 +9,7 @@ ## Building -The following sections describe how to set up and build the NeMo-Reinforcer documentation. +The following sections describe how to set up and build the NeMo-RL documentation. Switch to the documentation source folder and generate HTML output. @@ -62,7 +62,7 @@ def add(x: int, y: int) -> int: Examples: ```{doctest} - >>> from nemo_reinforcer.made_up_package import add + >>> from nemo_rl.made_up_package import add >>> add(1, 2) 3 ``` diff --git a/docs/guides/dpo.md b/docs/guides/dpo.md index 17e3bd303f..6c6ed62833 100644 --- a/docs/guides/dpo.md +++ b/docs/guides/dpo.md @@ -1,4 +1,4 @@ -# Direct Preference Optimization in Reinforcer +# Direct Preference Optimization in NeMo-RL [Direct Preference Optimization (DPO)](https://arxiv.org/pdf/2305.18290) is an RL-free alignment algorithm that operates on preference data. Given a prompt and a pair of chosen and rejected responses, DPO aims to increase the probability of the chosen response and decrease the probability of the rejected response relative to a frozen reference model. The actor is initialized using the reference model. For more details, refer to the @@ -16,7 +16,7 @@ If not specified, `config` will default to [examples/configs/dpo.yaml](../../exa ## Configuration -Reinforcer allows users to configure DPO experiments using `yaml` config files. An example DPO configuration file can be found [here](../../examples/configs/dpo.yaml). +NeMo-RL allows users to configure DPO experiments using `yaml` config files. An example DPO configuration file can be found [here](../../examples/configs/dpo.yaml). To override a value in the config, either update the value in the `yaml` file directly, or pass the override via the command line. For example: @@ -32,7 +32,7 @@ uv run examples/run_dpo.py \ ## Datasets -Each class representing a Reinforcer DPO dataset is expected to have the following attributes: +Each class representing a NeMo-RL DPO dataset is expected to have the following attributes: 1. `formatted_ds`: The dictionary of formatted datasets. This dictionary should contain `train` and `validation` splits, and each split should conform to the format described below. 2. `task_spec`: The `TaskDataSpec` for this dataset. This should specify the name you choose for this dataset. @@ -41,7 +41,7 @@ DPO datasets are expected to follow a specific format with three key fields: - `chosen_response`: The preferred/winning response - `rejected_response`: The non-preferred/losing response -[data/hf_datasets/helpsteer3.py](../../nemo_reinforcer/data/hf_datasets/helpsteer3.py) provides an example of how to format data for DPO: +[data/hf_datasets/helpsteer3.py](../../nemo_rl/data/hf_datasets/helpsteer3.py) provides an example of how to format data for DPO: ```python def format_helpsteer3(data): @@ -66,7 +66,7 @@ def format_helpsteer3(data): } ``` -We also provide a [DPODataset](../../nemo_reinforcer/data/hf_datasets/dpo.py) class that is compatible with jsonl-formatted preference datsets. This class assumes train and validation datasets have been split and processed into the expected format offline. The jsonl files should consist of examples with `prompt`, `chosen_response`, and `rejected_response` keys. +We also provide a [DPODataset](../../nemo_rl/data/hf_datasets/dpo.py) class that is compatible with jsonl-formatted preference datsets. This class assumes train and validation datasets have been split and processed into the expected format offline. The jsonl files should consist of examples with `prompt`, `chosen_response`, and `rejected_response` keys. ## Adding Custom DPO Datasets @@ -78,7 +78,7 @@ Here's a minimal example which simply re-keys an existing jsonl dataset: ```{testcode} from datasets import load_dataset -from nemo_reinforcer.data.interfaces import TaskDataSpec +from nemo_rl.data.interfaces import TaskDataSpec from docs.helpers import make_dpo_dataset class CustomDPODataset: @@ -158,7 +158,7 @@ First train example rejected response: 5 ## DPO-Specific Parameters -The DPO implementation in Reinforcer supports several key parameters that can be adjusted: +The DPO implementation in NeMo-RL supports several key parameters that can be adjusted: - `dpo.reference_policy_kl_penalty`: Controls the strength of the KL penalty term - `dpo.preference_loss_weight`: Weight for the preference loss diff --git a/docs/guides/grpo.md b/docs/guides/grpo.md index 716e609642..62dcd406f3 100644 --- a/docs/guides/grpo.md +++ b/docs/guides/grpo.md @@ -1,4 +1,4 @@ -# An in-depth walkthrough of GRPO in Reinforcer +# An in-depth walkthrough of GRPO in NeMo-RL ## Quickstart: Launch a GRPO Run @@ -28,7 +28,7 @@ In this guide, we'll walk through how we handle We support training with multiple RL "Environments" at the same time. -An [Environment](../../nemo_reinforcer/environments/interfaces.py) is an object that accepts a state/action history and returns an update state and rewards for the step. They run as Ray Remote Actors. Example [MathEnvironment](../../nemo_reinforcer/environments/math_environment.py). +An [Environment](../../nemo_rl/environments/interfaces.py) is an object that accepts a state/action history and returns an update state and rewards for the step. They run as Ray Remote Actors. Example [MathEnvironment](../../nemo_rl/environments/math_environment.py). To support this, we need to know: @@ -38,7 +38,7 @@ To support this, we need to know: #### Common Data Format -We define a [DatumSpec](../../nemo_reinforcer/data/interfaces.py) that holds all relevant information for each training example: +We define a [DatumSpec](../../nemo_rl/data/interfaces.py) that holds all relevant information for each training example: ```python class DatumSpec(TypedDict): @@ -54,7 +54,7 @@ class DatumSpec(TypedDict): #### Data Processors We name all distinct "environments your model wants to optimize against" "tasks". So you might define a "math" task or a "code" task. -For each task, you should provide a data processor that reads from your dataset and returns a [DatumSpec](../../nemo_reinforcer/data/interfaces.py) +For each task, you should provide a data processor that reads from your dataset and returns a [DatumSpec](../../nemo_rl/data/interfaces.py) ```python def my_data_processor( @@ -100,18 +100,18 @@ Notice that you provide a mapping of tasks to their processors so the dataset kn ### Policy Model -We define a {py:class}`PolicyInterface]() ` that contains everything you need to train a Policy model. +We define a {py:class}`PolicyInterface]() ` that contains everything you need to train a Policy model. -This Policy object holds a [RayWorkerGroup](../../nemo_reinforcer/distributed/worker_groups.py) of SPMD (1 proc/gpu) processes that run HF/MCore, all coordinated by this object so it appears to you like 1 GPU! +This Policy object holds a [RayWorkerGroup](../../nemo_rl/distributed/worker_groups.py) of SPMD (1 proc/gpu) processes that run HF/MCore, all coordinated by this object so it appears to you like 1 GPU! ### Fast Generation -We support vLLM through the [VllmGeneration](../../nemo_reinforcer/models/generation/vllm.py) class right now. +We support vLLM through the [VllmGeneration](../../nemo_rl/models/generation/vllm.py) class right now. -The function [grpo_train](../../nemo_reinforcer/algorithms/grpo.py) contains the core GRPO training loop. +The function [grpo_train](../../nemo_rl/algorithms/grpo.py) contains the core GRPO training loop. ### Loss -We use the [ClippedPGLossFn](../../nemo_reinforcer/algorithms/loss_functions.py) to calculate the loss for GRPO. Formally, +We use the [ClippedPGLossFn](../../nemo_rl/algorithms/loss_functions.py) to calculate the loss for GRPO. Formally, $$ L(\theta) = E_{x \sim \pi_{\theta_{\text{old}}}} \Big[ \min \Big(\frac{\pi_\theta(x)}{\pi_{\theta_{\text{old}}}(x)}A_t, \text{clip} \big( \frac{\pi_\theta(x)}{\pi_{\theta_{\text{old}}}(x)}, 1 - \varepsilon, 1 + \varepsilon \big) A_t \Big) \Big] - \beta D_{\text{KL}} (\pi_\theta \| \pi_\text{ref}) diff --git a/docs/guides/sft.md b/docs/guides/sft.md index 4d22b71427..ff2fd196d5 100644 --- a/docs/guides/sft.md +++ b/docs/guides/sft.md @@ -1,4 +1,4 @@ -# Supervised Fine-tuning in Reinforcer +# Supervised Fine-tuning in NeMo-RL ## Launch an SFT Run @@ -12,7 +12,7 @@ If not specified, `config` will default to [examples/configs/sft.yaml](../../exa ## Configuration -Reinforcer allows users to configure experiments using `yaml` config files. An example SFT configuration file can be found [here](../../examples/configs/sft.yaml). +NeMo-RL allows users to configure experiments using `yaml` config files. An example SFT configuration file can be found [here](../../examples/configs/sft.yaml). To override a value in the config, either update the value in the `yaml` file directly, or pass the override via the command line. For example: @@ -25,11 +25,11 @@ uv run examples/run_sft.py \ ## Datasets -SFT datasets in Reinforcer are encapsulated using classes. Each SFT data class is expected to have the following attributes: +SFT datasets in NeMo-RL are encapsulated using classes. Each SFT data class is expected to have the following attributes: 1. `formatted_ds`: The dictionary of formatted datasets. This dictionary should contain `train` and `validation` splits, and each split should conform to the format described below. 2. `task_spec`: The `TaskDataSpec` for this dataset. This should specify the name you choose for this dataset. -SFT datasets are expected to follow the HuggingFace chat format. Refer to the [chat dataset document](../design-docs/chat-datasets.md) for details. If your data is not in the correct format, simply write a preprocessing script to convert the data into this format. [data/hf_datasets/squad.py](../../nemo_reinforcer/data/hf_datasets/squad.py) has an example: +SFT datasets are expected to follow the HuggingFace chat format. Refer to the [chat dataset document](../design-docs/chat-datasets.md) for details. If your data is not in the correct format, simply write a preprocessing script to convert the data into this format. [data/hf_datasets/squad.py](../../nemo_rl/data/hf_datasets/squad.py) has an example: ```python def format_squad(data): @@ -51,7 +51,7 @@ def format_squad(data): } ``` -Reinforcer SFT uses HuggingFace chat templates to format the individual examples. Three types of chat templates are supported, which can be configured via `tokenizer.chat_template` in your yaml config (see [sft.yaml](../../examples/configs/sft.yaml) for an example): +NeMo-RL SFT uses HuggingFace chat templates to format the individual examples. Three types of chat templates are supported, which can be configured via `tokenizer.chat_template` in your yaml config (see [sft.yaml](../../examples/configs/sft.yaml) for an example): 1. Apply the tokenizer's default chat template. To use the tokenizer's default, either omit `tokenizer.chat_template` from the config altogether, or set `tokenizer.chat_template="default"`. 2. Use a "passthrough" template which simply concatenates all messages. This is desirable if the chat template has been applied to your dataset as an offline preprocessing step. In this case, you should set `tokenizer.chat_template` to None as follows: @@ -67,7 +67,7 @@ Reinforcer SFT uses HuggingFace chat templates to format the individual examples ``` -By default, NeMo-Reinforcer has support for `Squad` and `OpenAssistant` datasets. Both of these datasets are downloaded from HuggingFace and preprocessed on-the-fly, so there's no need to provide a path to any datasets on disk. +By default, NeMo-RL has support for `Squad` and `OpenAssistant` datasets. Both of these datasets are downloaded from HuggingFace and preprocessed on-the-fly, so there's no need to provide a path to any datasets on disk. Adding a new dataset is a straightforward process. As long as your custom dataset has the `formatted_ds` and `task_spec` attributes described above, it can serve as a drop-in replacement for Squad and OpenAssistant. \ No newline at end of file diff --git a/docs/local-workstation.md b/docs/local-workstation.md index 3e252694a0..482b41c5ad 100644 --- a/docs/local-workstation.md +++ b/docs/local-workstation.md @@ -2,7 +2,7 @@ ## Launching Locally -When launching examples locally with `uv`, {py:class}`init_ray() ` will first attempt to connect to an existing cluster. If none is found, it will start a local one and connect to it using all available GPU and CPU resources on your node. +When launching examples locally with `uv`, {py:class}`init_ray() ` will first attempt to connect to an existing cluster. If none is found, it will start a local one and connect to it using all available GPU and CPU resources on your node. To launch a job outside of a container, simply run: @@ -14,7 +14,7 @@ In the logs, you will see that Ray has started a local cluster instance, along w ``` 2025-03-17 13:37:45,360 INFO worker.py:1841 -- Started a local Ray instance. ... -INFO:nemo_reinforcer.distributed.virtual_cluster:Started local cluster with: {'node:__internal_head__': 1.0, 'CPU': 24.0, 'object_store_memory': 80448493977.0, 'accelerator_type:RTX': 1.0, 'memory': 177713152615.0, 'GPU': 1.0, 'node:10.0.0.1': 1.0} +INFO:nemo_rl.distributed.virtual_cluster:Started local cluster with: {'node:__internal_head__': 1.0, 'CPU': 24.0, 'object_store_memory': 80448493977.0, 'accelerator_type:RTX': 1.0, 'memory': 177713152615.0, 'GPU': 1.0, 'node:10.0.0.1': 1.0} ``` To control the GPUs ray uses locally more granularly, please use `CUDA_VISIBLE_DEVICES`: diff --git a/docs/testing.md b/docs/testing.md index 466789dfd6..672bdacc82 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -1,4 +1,4 @@ -# Testing Reinforcer +# Testing NeMo-RL ## Unit Tests diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml index 7823c8b64c..dac6be0a7e 100644 --- a/examples/configs/grpo_math_1B.yaml +++ b/examples/configs/grpo_math_1B.yaml @@ -28,7 +28,7 @@ checkpointing: save_period: 10 policy: - # Qwen/Qwen2.5-1.5B has tied weights which are only supported with dtensor policy with tp size 1 (https://github.com/NVIDIA/reinforcer/issues/227) + # Qwen/Qwen2.5-1.5B has tied weights which are only supported with dtensor policy with tp size 1 (https://github.com/NVIDIA/nemo-rl/issues/227) model_name: "Qwen/Qwen2.5-1.5B" tokenizer: name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default diff --git a/examples/configs/grpo_sliding_puzzle.yaml b/examples/configs/grpo_sliding_puzzle.yaml index 27ee2cae46..f7f0a53425 100644 --- a/examples/configs/grpo_sliding_puzzle.yaml +++ b/examples/configs/grpo_sliding_puzzle.yaml @@ -24,7 +24,7 @@ policy: max_new_tokens: ${policy.max_total_sequence_length} temperature: 1.0 # Setting top_p/top_k to 0.999/10000 to strip out Qwen's special/illegal tokens - # https://github.com/NVIDIA/reinforcer/issues/237 + # https://github.com/NVIDIA/nemo-rl/issues/237 top_p: 0.999 top_k: 10000 stop_token_ids: null diff --git a/examples/convert_dcp_to_hf.py b/examples/convert_dcp_to_hf.py index b314d93d7a..c185d424bf 100644 --- a/examples/convert_dcp_to_hf.py +++ b/examples/convert_dcp_to_hf.py @@ -16,7 +16,7 @@ import json import os import torch -from nemo_reinforcer.utils.native_checkpoint import convert_dcp_to_hf +from nemo_rl.utils.native_checkpoint import convert_dcp_to_hf def parse_args(): @@ -51,7 +51,7 @@ def main(): model_name_or_path = config["policy"]["model_name"] # TODO: After the following PR gets merged: - # https://github.com/NVIDIA/reinforcer/pull/148/files + # https://github.com/NVIDIA/nemo-rl/pull/148/files # tokenizer should be copied from policy/tokenizer/* instead of relying on the model name # We can expose a arg at the top level --tokenizer_path to plumb that through. # This is more stable than relying on the current NeMo-RL get_tokenizer() which can diff --git a/examples/run_dpo.py b/examples/run_dpo.py index f780933310..93de160b6e 100644 --- a/examples/run_dpo.py +++ b/examples/run_dpo.py @@ -20,17 +20,17 @@ from omegaconf import OmegaConf -from nemo_reinforcer.algorithms.dpo import MasterConfig, dpo_train, setup -from nemo_reinforcer.algorithms.utils import get_tokenizer -from nemo_reinforcer.distributed.virtual_cluster import init_ray -from nemo_reinforcer.utils.config import load_config, parse_hydra_overrides -from nemo_reinforcer.utils.logger import get_next_experiment_dir -from nemo_reinforcer.data import DataConfig, hf_datasets -from nemo_reinforcer.data.datasets import AllTaskProcessedDataset -from nemo_reinforcer.data.interfaces import TaskDataSpec, DatumSpec -from nemo_reinforcer.data.llm_message_utils import get_formatted_message_log +from nemo_rl.algorithms.dpo import MasterConfig, dpo_train, setup +from nemo_rl.algorithms.utils import get_tokenizer +from nemo_rl.distributed.virtual_cluster import init_ray +from nemo_rl.utils.config import load_config, parse_hydra_overrides +from nemo_rl.utils.logger import get_next_experiment_dir +from nemo_rl.data import DataConfig, hf_datasets +from nemo_rl.data.datasets import AllTaskProcessedDataset +from nemo_rl.data.interfaces import TaskDataSpec, DatumSpec +from nemo_rl.data.llm_message_utils import get_formatted_message_log from transformers import AutoTokenizer -from nemo_reinforcer.models.policy import PolicyConfig +from nemo_rl.models.policy import PolicyConfig def parse_args(): @@ -61,7 +61,7 @@ def dpo_preprocessor( Examples: ```{doctest} >>> from transformers import AutoTokenizer - >>> from nemo_reinforcer.data.interfaces import TaskDataSpec + >>> from nemo_rl.data.interfaces import TaskDataSpec >>> >>> # Initialize tokenizer and task spec >>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct") diff --git a/examples/run_eval.py b/examples/run_eval.py index 9c9cb8d9a3..abe85bcca8 100644 --- a/examples/run_eval.py +++ b/examples/run_eval.py @@ -24,15 +24,15 @@ from transformers import AutoTokenizer from examples.run_grpo_math import math_data_processor -from nemo_reinforcer.algorithms.utils import get_tokenizer -from nemo_reinforcer.data import MathDataConfig -from nemo_reinforcer.data.datasets import AllTaskProcessedDataset -from nemo_reinforcer.data.interfaces import TaskDataSpec -from nemo_reinforcer.data.llm_message_utils import remap_dataset_keys -from nemo_reinforcer.distributed.virtual_cluster import init_ray -from nemo_reinforcer.environments.math_environment import MathEnvironment -from nemo_reinforcer.evals.eval import MasterConfig, run_env_eval, setup -from nemo_reinforcer.models.generation.interfaces import configure_generation_config +from nemo_rl.algorithms.utils import get_tokenizer +from nemo_rl.data import MathDataConfig +from nemo_rl.data.datasets import AllTaskProcessedDataset +from nemo_rl.data.interfaces import TaskDataSpec +from nemo_rl.data.llm_message_utils import remap_dataset_keys +from nemo_rl.distributed.virtual_cluster import init_ray +from nemo_rl.environments.math_environment import MathEnvironment +from nemo_rl.evals.eval import MasterConfig, run_env_eval, setup +from nemo_rl.models.generation.interfaces import configure_generation_config def parse_args(): diff --git a/examples/run_grpo_math.py b/examples/run_grpo_math.py index 7a686d6a9e..2e70cc889b 100644 --- a/examples/run_grpo_math.py +++ b/examples/run_grpo_math.py @@ -21,17 +21,17 @@ from omegaconf import OmegaConf from transformers import AutoTokenizer -from nemo_reinforcer.algorithms.grpo import MasterConfig, grpo_train, setup -from nemo_reinforcer.algorithms.utils import get_tokenizer -from nemo_reinforcer.data import DataConfig -from nemo_reinforcer.data.datasets import AllTaskProcessedDataset -from nemo_reinforcer.data.hf_datasets.openmathinstruct2 import OpenMathInstruct2Dataset -from nemo_reinforcer.data.interfaces import DatumSpec, LLMMessageLogType, TaskDataSpec -from nemo_reinforcer.distributed.virtual_cluster import init_ray -from nemo_reinforcer.environments.math_environment import MathEnvironment -from nemo_reinforcer.models.generation.interfaces import configure_generation_config -from nemo_reinforcer.utils.config import load_config, parse_hydra_overrides -from nemo_reinforcer.utils.logger import get_next_experiment_dir +from nemo_rl.algorithms.grpo import MasterConfig, grpo_train, setup +from nemo_rl.algorithms.utils import get_tokenizer +from nemo_rl.data import DataConfig +from nemo_rl.data.datasets import AllTaskProcessedDataset +from nemo_rl.data.hf_datasets.openmathinstruct2 import OpenMathInstruct2Dataset +from nemo_rl.data.interfaces import DatumSpec, LLMMessageLogType, TaskDataSpec +from nemo_rl.distributed.virtual_cluster import init_ray +from nemo_rl.environments.math_environment import MathEnvironment +from nemo_rl.models.generation.interfaces import configure_generation_config +from nemo_rl.utils.config import load_config, parse_hydra_overrides +from nemo_rl.utils.logger import get_next_experiment_dir def parse_args(): @@ -175,7 +175,7 @@ def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig, env_configs): system_prompt_file=data_config["system_prompt_file"], ) - # Load OpenMathInstruct2Dataset using reinforcer datasets + # Load OpenMathInstruct2Dataset using nemo rl datasets if data_config["dataset_name"] == "OpenMathInstruct-2": print(f"Loading nvidia/OpenMathInstruct2Dataset for training and validation") data = OpenMathInstruct2Dataset() diff --git a/examples/run_grpo_sliding_puzzle.py b/examples/run_grpo_sliding_puzzle.py index abd468881f..2076f11fd0 100644 --- a/examples/run_grpo_sliding_puzzle.py +++ b/examples/run_grpo_sliding_puzzle.py @@ -24,21 +24,21 @@ from torch.utils.data import IterableDataset -from nemo_reinforcer.algorithms.grpo import MasterConfig, grpo_train, setup -from nemo_reinforcer.algorithms.utils import get_tokenizer +from nemo_rl.algorithms.grpo import MasterConfig, grpo_train, setup +from nemo_rl.algorithms.utils import get_tokenizer -from nemo_reinforcer.distributed.virtual_cluster import init_ray -from nemo_reinforcer.models.generation.interfaces import configure_generation_config -from nemo_reinforcer.utils.config import load_config, parse_hydra_overrides -from nemo_reinforcer.utils.logger import get_next_experiment_dir +from nemo_rl.distributed.virtual_cluster import init_ray +from nemo_rl.models.generation.interfaces import configure_generation_config +from nemo_rl.utils.config import load_config, parse_hydra_overrides +from nemo_rl.utils.logger import get_next_experiment_dir -from nemo_reinforcer.environments.games.sliding_puzzle import ( +from nemo_rl.environments.games.sliding_puzzle import ( SlidingPuzzleGameLogic, SlidingPuzzleEnv, SlidingPuzzleConfig, SlidingPuzzleMetadata, ) -from nemo_reinforcer.data.interfaces import LLMMessageLogType, DatumSpec +from nemo_rl.data.interfaces import LLMMessageLogType, DatumSpec def parse_args(): diff --git a/examples/run_sft.py b/examples/run_sft.py index 875aa9a000..2b7dd9489f 100644 --- a/examples/run_sft.py +++ b/examples/run_sft.py @@ -21,15 +21,15 @@ from omegaconf import OmegaConf from transformers import AutoTokenizer -from nemo_reinforcer.algorithms.sft import MasterConfig, sft_train, setup -from nemo_reinforcer.algorithms.utils import get_tokenizer -from nemo_reinforcer.data import DataConfig, hf_datasets -from nemo_reinforcer.data.datasets import AllTaskProcessedDataset -from nemo_reinforcer.data.interfaces import TaskDataSpec, DatumSpec -from nemo_reinforcer.data.llm_message_utils import get_formatted_message_log -from nemo_reinforcer.distributed.virtual_cluster import init_ray -from nemo_reinforcer.utils.config import load_config -from nemo_reinforcer.utils.logger import get_next_experiment_dir +from nemo_rl.algorithms.sft import MasterConfig, sft_train, setup +from nemo_rl.algorithms.utils import get_tokenizer +from nemo_rl.data import DataConfig, hf_datasets +from nemo_rl.data.datasets import AllTaskProcessedDataset +from nemo_rl.data.interfaces import TaskDataSpec, DatumSpec +from nemo_rl.data.llm_message_utils import get_formatted_message_log +from nemo_rl.distributed.virtual_cluster import init_ray +from nemo_rl.utils.config import load_config +from nemo_rl.utils.logger import get_next_experiment_dir def parse_args(): diff --git a/nemo_reinforcer/__init__.py b/nemo_rl/__init__.py similarity index 86% rename from nemo_reinforcer/__init__.py rename to nemo_rl/__init__.py index 457028a211..1606956b87 100644 --- a/nemo_reinforcer/__init__.py +++ b/nemo_rl/__init__.py @@ -1,5 +1,5 @@ import os -from nemo_reinforcer.package_info import ( +from nemo_rl.package_info import ( __contact_emails__, __contact_names__, __description__, diff --git a/nemo_reinforcer/algorithms/__init__.py b/nemo_rl/algorithms/__init__.py similarity index 100% rename from nemo_reinforcer/algorithms/__init__.py rename to nemo_rl/algorithms/__init__.py diff --git a/nemo_reinforcer/algorithms/dpo.py b/nemo_rl/algorithms/dpo.py similarity index 95% rename from nemo_reinforcer/algorithms/dpo.py rename to nemo_rl/algorithms/dpo.py index c4a6af834a..1545c050b6 100644 --- a/nemo_reinforcer/algorithms/dpo.py +++ b/nemo_rl/algorithms/dpo.py @@ -23,21 +23,21 @@ import numpy as np import torch from torchdata.stateful_dataloader import StatefulDataLoader -from nemo_reinforcer.algorithms.loss_functions import ( +from nemo_rl.algorithms.loss_functions import ( DPOLossFn, ) -from nemo_reinforcer.algorithms.utils import set_seed, get_tokenizer -from nemo_reinforcer.data import DataConfig -from nemo_reinforcer.data.datasets import AllTaskProcessedDataset, dpo_collate_fn -from nemo_reinforcer.data.interfaces import TaskDataSpec -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.distributed.virtual_cluster import ClusterConfig, RayVirtualCluster -from nemo_reinforcer.models.interfaces import PolicyInterface -from nemo_reinforcer.models.policy.hf_policy import HfPolicy -from nemo_reinforcer.models.policy import PolicyConfig -from nemo_reinforcer.utils.checkpoint import CheckpointManager, CheckpointingConfig -from nemo_reinforcer.utils.logger import Logger, LoggerConfig -from nemo_reinforcer.utils.timer import Timer +from nemo_rl.algorithms.utils import set_seed, get_tokenizer +from nemo_rl.data import DataConfig +from nemo_rl.data.datasets import AllTaskProcessedDataset, dpo_collate_fn +from nemo_rl.data.interfaces import TaskDataSpec +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.distributed.virtual_cluster import ClusterConfig, RayVirtualCluster +from nemo_rl.models.interfaces import PolicyInterface +from nemo_rl.models.policy.hf_policy import HfPolicy +from nemo_rl.models.policy import PolicyConfig +from nemo_rl.utils.checkpoint import CheckpointManager, CheckpointingConfig +from nemo_rl.utils.logger import Logger, LoggerConfig +from nemo_rl.utils.timer import Timer class DPOSaveState(TypedDict): @@ -71,7 +71,7 @@ class DPOConfig(TypedDict): preference_average_log_probs: bool sft_average_log_probs: bool ## TODO(@ashors) support other loss functions - ## https://github.com/NVIDIA/reinforcer/issues/193 + ## https://github.com/NVIDIA/nemo-rl/issues/193 # preference_loss: str # gt_reward_scale: float preference_loss_weight: float diff --git a/nemo_reinforcer/algorithms/grpo.py b/nemo_rl/algorithms/grpo.py similarity index 95% rename from nemo_reinforcer/algorithms/grpo.py rename to nemo_rl/algorithms/grpo.py index 3db313fcd2..88fd3f803b 100644 --- a/nemo_reinforcer/algorithms/grpo.py +++ b/nemo_rl/algorithms/grpo.py @@ -21,48 +21,48 @@ from torchdata.stateful_dataloader import StatefulDataLoader from transformers import AutoTokenizer -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.algorithms.utils import calculate_baseline_and_std_per_prompt +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.algorithms.utils import calculate_baseline_and_std_per_prompt -from nemo_reinforcer.environments.interfaces import ( +from nemo_rl.environments.interfaces import ( EnvironmentInterface, EnvironmentReturn, ) -from nemo_reinforcer.distributed.virtual_cluster import RayVirtualCluster -from nemo_reinforcer.data.interfaces import ( +from nemo_rl.distributed.virtual_cluster import RayVirtualCluster +from nemo_rl.data.interfaces import ( DatumSpec, LLMMessageLogType, FlatMessagesType, ) -from nemo_reinforcer.data.datasets import AllTaskProcessedDataset, rl_collate_fn -from nemo_reinforcer.models.policy.hf_policy import HfPolicy -from nemo_reinforcer.models.generation.vllm import VllmGeneration -from nemo_reinforcer.algorithms.loss_functions import ( +from nemo_rl.data.datasets import AllTaskProcessedDataset, rl_collate_fn +from nemo_rl.models.policy.hf_policy import HfPolicy +from nemo_rl.models.generation.vllm import VllmGeneration +from nemo_rl.algorithms.loss_functions import ( ClippedPGLossConfig, ClippedPGLossDataDict, ClippedPGLossFn, ) -from nemo_reinforcer.algorithms.interfaces import LossFunction -from nemo_reinforcer.data import DataConfig -from nemo_reinforcer.data.llm_message_utils import ( +from nemo_rl.algorithms.interfaces import LossFunction +from nemo_rl.data import DataConfig +from nemo_rl.data.llm_message_utils import ( get_keys_from_message_log, batched_message_log_to_flat_message, ) -from nemo_reinforcer.utils.logger import ( +from nemo_rl.utils.logger import ( print_message_log_samples, ) -from nemo_reinforcer.distributed.virtual_cluster import ClusterConfig -from nemo_reinforcer.environments.math_environment import MathEnvConfig -from nemo_reinforcer.models.generation.interfaces import ( +from nemo_rl.distributed.virtual_cluster import ClusterConfig +from nemo_rl.environments.math_environment import MathEnvConfig +from nemo_rl.models.generation.interfaces import ( GenerationInterface, GenerationDatumSpec, ) -from nemo_reinforcer.models.interfaces import PolicyInterface -from nemo_reinforcer.models.policy import PolicyConfig -from nemo_reinforcer.utils.logger import Logger, LoggerConfig -from nemo_reinforcer.utils.timer import Timer -from nemo_reinforcer.utils.checkpoint import CheckpointManager, CheckpointingConfig -from nemo_reinforcer.experience.rollouts import run_multi_turn_rollout +from nemo_rl.models.interfaces import PolicyInterface +from nemo_rl.models.policy import PolicyConfig +from nemo_rl.utils.logger import Logger, LoggerConfig +from nemo_rl.utils.timer import Timer +from nemo_rl.utils.checkpoint import CheckpointManager, CheckpointingConfig +from nemo_rl.experience.rollouts import run_multi_turn_rollout # =============================================================================== diff --git a/nemo_reinforcer/algorithms/interfaces.py b/nemo_rl/algorithms/interfaces.py similarity index 96% rename from nemo_reinforcer/algorithms/interfaces.py rename to nemo_rl/algorithms/interfaces.py index b0290ce17f..b36e266104 100644 --- a/nemo_reinforcer/algorithms/interfaces.py +++ b/nemo_rl/algorithms/interfaces.py @@ -16,7 +16,7 @@ import torch -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.distributed.batched_data_dict import BatchedDataDict class LossFunction(Protocol): diff --git a/nemo_reinforcer/algorithms/loss_functions.py b/nemo_rl/algorithms/loss_functions.py similarity index 98% rename from nemo_reinforcer/algorithms/loss_functions.py rename to nemo_rl/algorithms/loss_functions.py index 0370dcfa3a..26441fe616 100644 --- a/nemo_reinforcer/algorithms/loss_functions.py +++ b/nemo_rl/algorithms/loss_functions.py @@ -15,14 +15,14 @@ import torch -from nemo_reinforcer.algorithms.interfaces import LossFunction -from nemo_reinforcer.algorithms.utils import ( +from nemo_rl.algorithms.interfaces import LossFunction +from nemo_rl.algorithms.utils import ( calculate_kl_penalty_joschu2020, masked_mean, ) -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.models.dtensor.parallelize import ( +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.models.dtensor.parallelize import ( get_logprobs_from_vocab_parallel_logits, ) diff --git a/nemo_reinforcer/algorithms/sft.py b/nemo_rl/algorithms/sft.py similarity index 95% rename from nemo_reinforcer/algorithms/sft.py rename to nemo_rl/algorithms/sft.py index 4bcc9a8a41..6400d287ec 100644 --- a/nemo_reinforcer/algorithms/sft.py +++ b/nemo_rl/algorithms/sft.py @@ -20,25 +20,25 @@ import numpy as np import torch from torchdata.stateful_dataloader import StatefulDataLoader -from nemo_reinforcer.algorithms.loss_functions import ( +from nemo_rl.algorithms.loss_functions import ( NLLLoss, ) -from nemo_reinforcer.algorithms.utils import set_seed -from nemo_reinforcer.data import DataConfig -from nemo_reinforcer.data.datasets import AllTaskProcessedDataset, rl_collate_fn -from nemo_reinforcer.data.interfaces import TaskDataSpec -from nemo_reinforcer.data.llm_message_utils import ( +from nemo_rl.algorithms.utils import set_seed +from nemo_rl.data import DataConfig +from nemo_rl.data.datasets import AllTaskProcessedDataset, rl_collate_fn +from nemo_rl.data.interfaces import TaskDataSpec +from nemo_rl.data.llm_message_utils import ( add_loss_mask_to_message_log, batched_message_log_to_flat_message, ) -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.distributed.virtual_cluster import ClusterConfig, RayVirtualCluster -from nemo_reinforcer.models.interfaces import PolicyInterface -from nemo_reinforcer.models.policy.hf_policy import HfPolicy -from nemo_reinforcer.models.policy import PolicyConfig -from nemo_reinforcer.utils.checkpoint import CheckpointManager, CheckpointingConfig -from nemo_reinforcer.utils.logger import Logger, LoggerConfig -from nemo_reinforcer.utils.timer import Timer +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.distributed.virtual_cluster import ClusterConfig, RayVirtualCluster +from nemo_rl.models.interfaces import PolicyInterface +from nemo_rl.models.policy.hf_policy import HfPolicy +from nemo_rl.models.policy import PolicyConfig +from nemo_rl.utils.checkpoint import CheckpointManager, CheckpointingConfig +from nemo_rl.utils.logger import Logger, LoggerConfig +from nemo_rl.utils.timer import Timer class SFTSaveState(TypedDict): diff --git a/nemo_reinforcer/algorithms/utils.py b/nemo_rl/algorithms/utils.py similarity index 97% rename from nemo_reinforcer/algorithms/utils.py rename to nemo_rl/algorithms/utils.py index e6610367ae..81124ef85e 100644 --- a/nemo_reinforcer/algorithms/utils.py +++ b/nemo_rl/algorithms/utils.py @@ -20,8 +20,8 @@ from torch.masked import as_masked_tensor from transformers import AutoTokenizer -from nemo_reinforcer.data import hf_datasets -from nemo_reinforcer.models.policy import TokenizerConfig +from nemo_rl.data import hf_datasets +from nemo_rl.models.policy import TokenizerConfig def calculate_kl_penalty_joschu2020( @@ -157,7 +157,7 @@ def get_tokenizer(tokenizer_config: TokenizerConfig) -> AutoTokenizer: Examples: ```{doctest} >>> from transformers import AutoTokenizer - >>> from nemo_reinforcer.algorithms.utils import get_tokenizer + >>> from nemo_rl.algorithms.utils import get_tokenizer >>> # not specifying a chat template uses the tokenizer's default >>> config = {"name": "meta-llama/Llama-3.2-1B-Instruct"} >>> tokenizer = get_tokenizer(config) diff --git a/nemo_reinforcer/converters/__init__.py b/nemo_rl/converters/__init__.py similarity index 100% rename from nemo_reinforcer/converters/__init__.py rename to nemo_rl/converters/__init__.py diff --git a/nemo_reinforcer/converters/huggingface/__init__.py b/nemo_rl/converters/huggingface/__init__.py similarity index 100% rename from nemo_reinforcer/converters/huggingface/__init__.py rename to nemo_rl/converters/huggingface/__init__.py diff --git a/nemo_reinforcer/converters/huggingface/vllm_export.py b/nemo_rl/converters/huggingface/vllm_export.py similarity index 100% rename from nemo_reinforcer/converters/huggingface/vllm_export.py rename to nemo_rl/converters/huggingface/vllm_export.py diff --git a/nemo_reinforcer/converters/megatron/__init__.py b/nemo_rl/converters/megatron/__init__.py similarity index 100% rename from nemo_reinforcer/converters/megatron/__init__.py rename to nemo_rl/converters/megatron/__init__.py diff --git a/nemo_reinforcer/converters/megatron/vllm_export.py b/nemo_rl/converters/megatron/vllm_export.py similarity index 100% rename from nemo_reinforcer/converters/megatron/vllm_export.py rename to nemo_rl/converters/megatron/vllm_export.py diff --git a/nemo_reinforcer/data/__init__.py b/nemo_rl/data/__init__.py similarity index 100% rename from nemo_reinforcer/data/__init__.py rename to nemo_rl/data/__init__.py diff --git a/nemo_reinforcer/data/datasets.py b/nemo_rl/data/datasets.py similarity index 96% rename from nemo_reinforcer/data/datasets.py rename to nemo_rl/data/datasets.py index f872b4e9f5..99de1d6520 100644 --- a/nemo_reinforcer/data/datasets.py +++ b/nemo_rl/data/datasets.py @@ -16,16 +16,16 @@ import torch from datasets import Dataset -from nemo_reinforcer.data.interfaces import ( +from nemo_rl.data.interfaces import ( TaskDataSpec, TaskDataProcessFnCallable, DatumSpec, ) -from nemo_reinforcer.data.llm_message_utils import ( +from nemo_rl.data.llm_message_utils import ( add_loss_mask_to_message_log, batched_message_log_to_flat_message, ) -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.distributed.batched_data_dict import BatchedDataDict # TODO @sahilj handle too-long prompts and masking them out throughout the whole process and renormalizing on loss @@ -155,8 +155,8 @@ def eval_collate_fn(data_batch: List[DatumSpec]) -> BatchedDataDict: Examples: ```{doctest} >>> import torch - >>> from nemo_reinforcer.data.datasets import eval_collate_fn - >>> from nemo_reinforcer.data.interfaces import DatumSpec + >>> from nemo_rl.data.datasets import eval_collate_fn + >>> from nemo_rl.data.interfaces import DatumSpec >>> data_batch = [ ... DatumSpec( ... message_log=[{"role": "user", "content": "Hello", "token_ids": torch.tensor([1, 2, 3])}], diff --git a/nemo_reinforcer/data/hf_datasets/__init__.py b/nemo_rl/data/hf_datasets/__init__.py similarity index 65% rename from nemo_reinforcer/data/hf_datasets/__init__.py rename to nemo_rl/data/hf_datasets/__init__.py index c6e7c8c75c..d33cca3ac2 100644 --- a/nemo_reinforcer/data/hf_datasets/__init__.py +++ b/nemo_rl/data/hf_datasets/__init__.py @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo_reinforcer.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES -from nemo_reinforcer.data.hf_datasets.dpo import DPODataset -from nemo_reinforcer.data.hf_datasets.helpsteer3 import HelpSteer3Dataset -from nemo_reinforcer.data.hf_datasets.oasst import OasstDataset -from nemo_reinforcer.data.hf_datasets.prompt_response_dataset import ( +from nemo_rl.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES +from nemo_rl.data.hf_datasets.dpo import DPODataset +from nemo_rl.data.hf_datasets.helpsteer3 import HelpSteer3Dataset +from nemo_rl.data.hf_datasets.oasst import OasstDataset +from nemo_rl.data.hf_datasets.prompt_response_dataset import ( PromptResponseDataset, ) -from nemo_reinforcer.data.hf_datasets.squad import SquadDataset +from nemo_rl.data.hf_datasets.squad import SquadDataset __all__ = [ "DPODataset", diff --git a/nemo_reinforcer/data/hf_datasets/chat_templates.py b/nemo_rl/data/hf_datasets/chat_templates.py similarity index 100% rename from nemo_reinforcer/data/hf_datasets/chat_templates.py rename to nemo_rl/data/hf_datasets/chat_templates.py diff --git a/nemo_reinforcer/data/hf_datasets/dpo.py b/nemo_rl/data/hf_datasets/dpo.py similarity index 96% rename from nemo_reinforcer/data/hf_datasets/dpo.py rename to nemo_rl/data/hf_datasets/dpo.py index f0cb022498..03d5c7e872 100644 --- a/nemo_reinforcer/data/hf_datasets/dpo.py +++ b/nemo_rl/data/hf_datasets/dpo.py @@ -13,7 +13,7 @@ # limitations under the License. from datasets import load_dataset -from nemo_reinforcer.data.interfaces import TaskDataSpec +from nemo_rl.data.interfaces import TaskDataSpec class DPODataset: diff --git a/nemo_reinforcer/data/hf_datasets/helpsteer3.py b/nemo_rl/data/hf_datasets/helpsteer3.py similarity index 96% rename from nemo_reinforcer/data/hf_datasets/helpsteer3.py rename to nemo_rl/data/hf_datasets/helpsteer3.py index 0ad0263c30..73e8828927 100644 --- a/nemo_reinforcer/data/hf_datasets/helpsteer3.py +++ b/nemo_rl/data/hf_datasets/helpsteer3.py @@ -14,7 +14,7 @@ from datasets import load_dataset from absl import logging -from nemo_reinforcer.data.interfaces import TaskDataSpec +from nemo_rl.data.interfaces import TaskDataSpec def format_helpsteer3(data): diff --git a/nemo_reinforcer/data/hf_datasets/oasst.py b/nemo_rl/data/hf_datasets/oasst.py similarity index 98% rename from nemo_reinforcer/data/hf_datasets/oasst.py rename to nemo_rl/data/hf_datasets/oasst.py index 3d10c46e37..45307f8704 100644 --- a/nemo_reinforcer/data/hf_datasets/oasst.py +++ b/nemo_rl/data/hf_datasets/oasst.py @@ -21,7 +21,7 @@ from dataclasses import dataclass from typing import Optional -from nemo_reinforcer.data.interfaces import TaskDataSpec +from nemo_rl.data.interfaces import TaskDataSpec SYSTEM_PROMPT = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n\n" diff --git a/nemo_reinforcer/data/hf_datasets/openmathinstruct2.py b/nemo_rl/data/hf_datasets/openmathinstruct2.py similarity index 94% rename from nemo_reinforcer/data/hf_datasets/openmathinstruct2.py rename to nemo_rl/data/hf_datasets/openmathinstruct2.py index b5a0bfa1bc..3c1fc318b4 100644 --- a/nemo_reinforcer/data/hf_datasets/openmathinstruct2.py +++ b/nemo_rl/data/hf_datasets/openmathinstruct2.py @@ -16,7 +16,7 @@ from datasets import load_dataset from dataclasses import dataclass -from nemo_reinforcer.data.interfaces import TaskDataSpec +from nemo_rl.data.interfaces import TaskDataSpec def format_math(data): @@ -31,7 +31,7 @@ def format_math(data): "content": data["expected_answer"], }, ], - # For v0.1 release, reinforcer datasets require a task_name key such that user can map a task processor per unique task. + # For v0.1 release, nemo rl datasets require a task_name key such that user can map a task processor per unique task. "task_name": "math", } diff --git a/nemo_reinforcer/data/hf_datasets/prompt_response_dataset.py b/nemo_rl/data/hf_datasets/prompt_response_dataset.py similarity index 96% rename from nemo_reinforcer/data/hf_datasets/prompt_response_dataset.py rename to nemo_rl/data/hf_datasets/prompt_response_dataset.py index 928af4fdff..a8740527fb 100644 --- a/nemo_reinforcer/data/hf_datasets/prompt_response_dataset.py +++ b/nemo_rl/data/hf_datasets/prompt_response_dataset.py @@ -13,7 +13,7 @@ # limitations under the License. from datasets import load_dataset -from nemo_reinforcer.data.interfaces import TaskDataSpec +from nemo_rl.data.interfaces import TaskDataSpec class PromptResponseDataset: diff --git a/nemo_reinforcer/data/hf_datasets/squad.py b/nemo_rl/data/hf_datasets/squad.py similarity index 95% rename from nemo_reinforcer/data/hf_datasets/squad.py rename to nemo_rl/data/hf_datasets/squad.py index 3bc257c88a..4e406011bb 100644 --- a/nemo_reinforcer/data/hf_datasets/squad.py +++ b/nemo_rl/data/hf_datasets/squad.py @@ -15,7 +15,7 @@ from typing import Optional from datasets import load_dataset -from nemo_reinforcer.data.interfaces import TaskDataSpec +from nemo_rl.data.interfaces import TaskDataSpec def format_squad(data): diff --git a/nemo_reinforcer/data/interfaces.py b/nemo_rl/data/interfaces.py similarity index 100% rename from nemo_reinforcer/data/interfaces.py rename to nemo_rl/data/interfaces.py diff --git a/nemo_reinforcer/data/llm_message_utils.py b/nemo_rl/data/llm_message_utils.py similarity index 97% rename from nemo_reinforcer/data/llm_message_utils.py rename to nemo_rl/data/llm_message_utils.py index db908f7bc9..ec7ccdbed5 100644 --- a/nemo_reinforcer/data/llm_message_utils.py +++ b/nemo_rl/data/llm_message_utils.py @@ -16,12 +16,12 @@ import torch from datasets import Dataset -from nemo_reinforcer.data.interfaces import ( +from nemo_rl.data.interfaces import ( LLMMessageLogType, FlatMessagesType, TaskDataSpec, ) -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.distributed.batched_data_dict import BatchedDataDict def message_log_to_flat_messages( @@ -42,7 +42,7 @@ def message_log_to_flat_messages( Examples: ```{doctest} >>> import torch - >>> from nemo_reinforcer.data.llm_message_utils import message_log_to_flat_messages + >>> from nemo_rl.data.llm_message_utils import message_log_to_flat_messages >>> # Create a simple message log with two messages >>> message_log = [ ... {'role': 'user', 'content': 'Hello', 'token_ids': torch.tensor([1, 2, 3])}, @@ -223,8 +223,8 @@ def batched_message_log_to_flat_message( Examples: ```{doctest} >>> import torch - >>> from nemo_reinforcer.data.llm_message_utils import batched_message_log_to_flat_message - >>> from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict + >>> from nemo_rl.data.llm_message_utils import batched_message_log_to_flat_message + >>> from nemo_rl.distributed.batched_data_dict import BatchedDataDict >>> # Create a batch of two message logs with different lengths >>> message_log_batch = [ ... # First conversation diff --git a/nemo_reinforcer/distributed/__init__.py b/nemo_rl/distributed/__init__.py similarity index 100% rename from nemo_reinforcer/distributed/__init__.py rename to nemo_rl/distributed/__init__.py diff --git a/nemo_reinforcer/distributed/batched_data_dict.py b/nemo_rl/distributed/batched_data_dict.py similarity index 99% rename from nemo_reinforcer/distributed/batched_data_dict.py rename to nemo_rl/distributed/batched_data_dict.py index 8e00e3b13b..c86562a9f1 100644 --- a/nemo_reinforcer/distributed/batched_data_dict.py +++ b/nemo_rl/distributed/batched_data_dict.py @@ -18,7 +18,7 @@ import torch -from nemo_reinforcer.distributed.collectives import ( +from nemo_rl.distributed.collectives import ( rebalance_nd_tensor, gather_jagged_object_lists, ) @@ -161,7 +161,7 @@ def shard_by_batch_size( Examples: ```{doctest} - >>> from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict + >>> from nemo_rl.distributed.batched_data_dict import BatchedDataDict >>> # Create a batch of two message logs with different lengths >>> batch = BatchedDataDict({ ... 'problem_id': [0, 0, 1, 1, 2, 2, 3, 3], diff --git a/nemo_reinforcer/distributed/collectives.py b/nemo_rl/distributed/collectives.py similarity index 100% rename from nemo_reinforcer/distributed/collectives.py rename to nemo_rl/distributed/collectives.py diff --git a/nemo_reinforcer/distributed/model_utils.py b/nemo_rl/distributed/model_utils.py similarity index 100% rename from nemo_reinforcer/distributed/model_utils.py rename to nemo_rl/distributed/model_utils.py diff --git a/nemo_reinforcer/distributed/virtual_cluster.py b/nemo_rl/distributed/virtual_cluster.py similarity index 99% rename from nemo_reinforcer/distributed/virtual_cluster.py rename to nemo_rl/distributed/virtual_cluster.py index 1213d8d897..944ddb442e 100644 --- a/nemo_reinforcer/distributed/virtual_cluster.py +++ b/nemo_rl/distributed/virtual_cluster.py @@ -41,10 +41,10 @@ class PY_EXECUTABLES: SYSTEM = sys.executable # TODO: Debug why run-to-run variance is so high with these options - # Use NeMo-Reinforcer direct dependencies. + # Use NeMo-RL direct dependencies. BASE = "uv run --locked" - # Use NeMo-Reinforcer direct dependencies and vllm. + # Use NeMo-RL direct dependencies and vllm. VLLM = "uv run --locked --extra vllm" diff --git a/nemo_reinforcer/distributed/worker_groups.py b/nemo_rl/distributed/worker_groups.py similarity index 98% rename from nemo_reinforcer/distributed/worker_groups.py rename to nemo_rl/distributed/worker_groups.py index 6e3642d842..c982718067 100644 --- a/nemo_reinforcer/distributed/worker_groups.py +++ b/nemo_rl/distributed/worker_groups.py @@ -22,9 +22,9 @@ from ray.util.placement_group import PlacementGroup from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy -from nemo_reinforcer.distributed.virtual_cluster import RayVirtualCluster -from nemo_reinforcer.distributed.batched_data_dict import SlicedDataDict -from nemo_reinforcer.utils.venvs import create_local_venv +from nemo_rl.distributed.virtual_cluster import RayVirtualCluster +from nemo_rl.distributed.batched_data_dict import SlicedDataDict +from nemo_rl.utils.venvs import create_local_venv @dataclass @@ -162,7 +162,7 @@ def __call__( # If the py_executable begins with uv it signals that we need to create a # local venv first and then replace the py_executable with the local venv's python. # The directory the venv will be created in is controlled by the env var - # REINFORCER_VENV_DIR and defaults to $GIT_ROOT/venvs/. + # NEMO_RL_VENV_DIR and defaults to $GIT_ROOT/venvs/. unwrapped_cls = worker_class.__ray_actor_class__ venv_python = create_local_venv( py_executable=options["runtime_env"]["py_executable"], diff --git a/nemo_reinforcer/environments/__init__.py b/nemo_rl/environments/__init__.py similarity index 100% rename from nemo_reinforcer/environments/__init__.py rename to nemo_rl/environments/__init__.py diff --git a/nemo_reinforcer/environments/games/sliding_puzzle.py b/nemo_rl/environments/games/sliding_puzzle.py similarity index 98% rename from nemo_reinforcer/environments/games/sliding_puzzle.py rename to nemo_rl/environments/games/sliding_puzzle.py index 0bb595bc0c..6a41f004c5 100644 --- a/nemo_reinforcer/environments/games/sliding_puzzle.py +++ b/nemo_rl/environments/games/sliding_puzzle.py @@ -18,13 +18,13 @@ import random import copy -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.data.interfaces import LLMMessageLogType -from nemo_reinforcer.environments.interfaces import ( +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.data.interfaces import LLMMessageLogType +from nemo_rl.environments.interfaces import ( EnvironmentInterface, EnvironmentReturn, ) -from nemo_reinforcer.distributed.virtual_cluster import PY_EXECUTABLES +from nemo_rl.distributed.virtual_cluster import PY_EXECUTABLES class SlidingPuzzleConfig(TypedDict): diff --git a/nemo_reinforcer/environments/interfaces.py b/nemo_rl/environments/interfaces.py similarity index 96% rename from nemo_reinforcer/environments/interfaces.py rename to nemo_rl/environments/interfaces.py index 46f42bc24f..447f8cc318 100644 --- a/nemo_reinforcer/environments/interfaces.py +++ b/nemo_rl/environments/interfaces.py @@ -16,8 +16,8 @@ from torch import Tensor -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.data.interfaces import LLMMessageLogType +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.data.interfaces import LLMMessageLogType class EnvironmentReturn(NamedTuple): diff --git a/nemo_reinforcer/environments/math_environment.py b/nemo_rl/environments/math_environment.py similarity index 95% rename from nemo_reinforcer/environments/math_environment.py rename to nemo_rl/environments/math_environment.py index cc61fdcb2c..e82cf36050 100644 --- a/nemo_reinforcer/environments/math_environment.py +++ b/nemo_rl/environments/math_environment.py @@ -18,16 +18,16 @@ import torch from math_verify import parse, verify -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.environments.interfaces import ( +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.environments.interfaces import ( EnvironmentInterface, EnvironmentReturn, ) -from nemo_reinforcer.environments.metrics import ( +from nemo_rl.environments.metrics import ( calculate_pass_rate_per_prompt, ) -from nemo_reinforcer.environments.utils import chunk_list_to_workers -from nemo_reinforcer.distributed.virtual_cluster import PY_EXECUTABLES +from nemo_rl.environments.utils import chunk_list_to_workers +from nemo_rl.distributed.virtual_cluster import PY_EXECUTABLES class MathEnvConfig(TypedDict): diff --git a/nemo_reinforcer/environments/metrics.py b/nemo_rl/environments/metrics.py similarity index 100% rename from nemo_reinforcer/environments/metrics.py rename to nemo_rl/environments/metrics.py diff --git a/nemo_reinforcer/environments/utils.py b/nemo_rl/environments/utils.py similarity index 96% rename from nemo_reinforcer/environments/utils.py rename to nemo_rl/environments/utils.py index 75c912cf26..d54a74efcf 100644 --- a/nemo_reinforcer/environments/utils.py +++ b/nemo_rl/environments/utils.py @@ -30,7 +30,7 @@ def chunk_list_to_workers(to_chunk: List[Any], num_workers: int) -> List[List[An Examples: ```{doctest} - >>> from nemo_reinforcer.environments.utils import chunk_list_to_workers + >>> from nemo_rl.environments.utils import chunk_list_to_workers >>> chunk_list_to_workers([1, 2, 3, 4, 5], 3) [[1, 2], [3, 4], [5]] ``` diff --git a/nemo_reinforcer/evals/__init__.py b/nemo_rl/evals/__init__.py similarity index 100% rename from nemo_reinforcer/evals/__init__.py rename to nemo_rl/evals/__init__.py diff --git a/nemo_reinforcer/evals/eval.py b/nemo_rl/evals/eval.py similarity index 90% rename from nemo_reinforcer/evals/eval.py rename to nemo_rl/evals/eval.py index d0c27044d8..217c4c125b 100644 --- a/nemo_reinforcer/evals/eval.py +++ b/nemo_rl/evals/eval.py @@ -19,14 +19,14 @@ from torch.utils.data import DataLoader from transformers import AutoTokenizer -from nemo_reinforcer.data import MathDataConfig -from nemo_reinforcer.data.datasets import AllTaskProcessedDataset, eval_collate_fn -from nemo_reinforcer.data.llm_message_utils import get_keys_from_message_log -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.distributed.virtual_cluster import ClusterConfig, RayVirtualCluster -from nemo_reinforcer.environments.math_environment import MathEnvConfig -from nemo_reinforcer.models.generation.interfaces import GenerationConfig -from nemo_reinforcer.models.generation.vllm import VllmGeneration +from nemo_rl.data import MathDataConfig +from nemo_rl.data.datasets import AllTaskProcessedDataset, eval_collate_fn +from nemo_rl.data.llm_message_utils import get_keys_from_message_log +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.distributed.virtual_cluster import ClusterConfig, RayVirtualCluster +from nemo_rl.environments.math_environment import MathEnvConfig +from nemo_rl.models.generation.interfaces import GenerationConfig +from nemo_rl.models.generation.vllm import VllmGeneration # =============================================================================== diff --git a/nemo_reinforcer/experience/__init__.py b/nemo_rl/experience/__init__.py similarity index 100% rename from nemo_reinforcer/experience/__init__.py rename to nemo_rl/experience/__init__.py diff --git a/nemo_reinforcer/experience/rollouts.py b/nemo_rl/experience/rollouts.py similarity index 98% rename from nemo_reinforcer/experience/rollouts.py rename to nemo_rl/experience/rollouts.py index f41661efaf..e781ab73a7 100644 --- a/nemo_reinforcer/experience/rollouts.py +++ b/nemo_rl/experience/rollouts.py @@ -20,21 +20,21 @@ from transformers import AutoTokenizer import ray -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.data.interfaces import ( +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.data.interfaces import ( DatumSpec, LLMMessageLogType, FlatMessagesType, ) -from nemo_reinforcer.data.llm_message_utils import ( +from nemo_rl.data.llm_message_utils import ( get_keys_from_message_log, batched_message_log_to_flat_message, ) -from nemo_reinforcer.models.generation.interfaces import ( +from nemo_rl.models.generation.interfaces import ( GenerationInterface, GenerationDatumSpec, ) -from nemo_reinforcer.environments.interfaces import ( +from nemo_rl.environments.interfaces import ( EnvironmentInterface, EnvironmentReturn, ) diff --git a/nemo_reinforcer/metrics/__init__.py b/nemo_rl/metrics/__init__.py similarity index 100% rename from nemo_reinforcer/metrics/__init__.py rename to nemo_rl/metrics/__init__.py diff --git a/nemo_reinforcer/metrics/metrics_utils.py b/nemo_rl/metrics/metrics_utils.py similarity index 100% rename from nemo_reinforcer/metrics/metrics_utils.py rename to nemo_rl/metrics/metrics_utils.py diff --git a/nemo_reinforcer/models/__init__.py b/nemo_rl/models/__init__.py similarity index 100% rename from nemo_reinforcer/models/__init__.py rename to nemo_rl/models/__init__.py diff --git a/nemo_reinforcer/models/dtensor/__init__.py b/nemo_rl/models/dtensor/__init__.py similarity index 100% rename from nemo_reinforcer/models/dtensor/__init__.py rename to nemo_rl/models/dtensor/__init__.py diff --git a/nemo_reinforcer/models/dtensor/parallelize.py b/nemo_rl/models/dtensor/parallelize.py similarity index 99% rename from nemo_reinforcer/models/dtensor/parallelize.py rename to nemo_rl/models/dtensor/parallelize.py index 59602aab79..8922f203c7 100644 --- a/nemo_reinforcer/models/dtensor/parallelize.py +++ b/nemo_rl/models/dtensor/parallelize.py @@ -33,7 +33,7 @@ from transformers.models.llama.modeling_llama import LlamaForCausalLM from typing import Union, List -from nemo_reinforcer.distributed.model_utils import from_parallel_logits_to_logprobs +from nemo_rl.distributed.model_utils import from_parallel_logits_to_logprobs from torch.distributed.device_mesh import DeviceMesh diff --git a/nemo_reinforcer/models/generation/__init__.py b/nemo_rl/models/generation/__init__.py similarity index 100% rename from nemo_reinforcer/models/generation/__init__.py rename to nemo_rl/models/generation/__init__.py diff --git a/nemo_reinforcer/models/generation/interfaces.py b/nemo_rl/models/generation/interfaces.py similarity index 97% rename from nemo_reinforcer/models/generation/interfaces.py rename to nemo_rl/models/generation/interfaces.py index 48ed8554d8..4d6a5ec4d7 100644 --- a/nemo_reinforcer/models/generation/interfaces.py +++ b/nemo_rl/models/generation/interfaces.py @@ -17,7 +17,7 @@ import torch from transformers import AutoTokenizer -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.distributed.batched_data_dict import BatchedDataDict def verify_right_padding( @@ -48,7 +48,7 @@ def verify_right_padding( assert pad_value is not None, ( "Tokenizer does not have a pad_token_id. \n" - "Please use the nemo_reinforcer.algorithms.utils.get_tokenizer(...) API which sets pad_token_id if absent." + "Please use the nemo_rl.algorithms.utils.get_tokenizer(...) API which sets pad_token_id if absent." ) # Determine which type of data we're dealing with diff --git a/nemo_reinforcer/models/generation/vllm.py b/nemo_rl/models/generation/vllm.py similarity index 98% rename from nemo_reinforcer/models/generation/vllm.py rename to nemo_rl/models/generation/vllm.py index 1f72eb3df0..dee3040cb3 100644 --- a/nemo_reinforcer/models/generation/vllm.py +++ b/nemo_rl/models/generation/vllm.py @@ -19,26 +19,26 @@ import ray import torch -from nemo_reinforcer.models.generation.interfaces import ( +from nemo_rl.models.generation.interfaces import ( GenerationInterface, GenerationDatumSpec, GenerationOutputSpec, verify_right_padding, GenerationConfig, ) -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.distributed.virtual_cluster import ( +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.distributed.virtual_cluster import ( RayVirtualCluster, PY_EXECUTABLES, ) -from nemo_reinforcer.distributed.worker_groups import RayWorkerGroup, RayWorkerBuilder +from nemo_rl.distributed.worker_groups import RayWorkerGroup, RayWorkerBuilder class VllmSpecificArgs(TypedDict): tensor_parallel_size: int gpu_memory_utilization: float max_model_len: int - # Additional arguments for vLLM inserted by reinforcer based on the context of when vllm is used + # Additional arguments for vLLM inserted by nemo rl based on the context of when vllm is used skip_tokenizer_init: bool load_format: str @@ -191,11 +191,11 @@ def __init__( enable_prefix_caching=True, dtype="auto", seed=seed, - # Don't use cuda-graph by default as it leads to convergence issue (see https://github.com/NVIDIA/reinforcer/issues/186) + # Don't use cuda-graph by default as it leads to convergence issue (see https://github.com/NVIDIA/nemo-rl/issues/186) enforce_eager=True, max_model_len=self.cfg["vllm_cfg"]["max_model_len"], trust_remote_code=True, - worker_extension_cls="nemo_reinforcer.models.generation.vllm_backend.VllmInternalWorkerExtension", + worker_extension_cls="nemo_rl.models.generation.vllm_backend.VllmInternalWorkerExtension", enable_sleep_mode=True, disable_log_stats=True, **vllm_kwargs, diff --git a/nemo_reinforcer/models/generation/vllm_backend.py b/nemo_rl/models/generation/vllm_backend.py similarity index 97% rename from nemo_reinforcer/models/generation/vllm_backend.py rename to nemo_rl/models/generation/vllm_backend.py index 28cf9fbd2f..41498b5312 100644 --- a/nemo_reinforcer/models/generation/vllm_backend.py +++ b/nemo_rl/models/generation/vllm_backend.py @@ -24,7 +24,7 @@ class VllmInternalWorkerExtension: def report_device_id(self) -> str: - from nemo_reinforcer.utils.nvml import get_device_uuid + from nemo_rl.utils.nvml import get_device_uuid return get_device_uuid(self.device.index) diff --git a/nemo_reinforcer/models/huggingface/__init__.py b/nemo_rl/models/huggingface/__init__.py similarity index 100% rename from nemo_reinforcer/models/huggingface/__init__.py rename to nemo_rl/models/huggingface/__init__.py diff --git a/nemo_reinforcer/models/huggingface/common.py b/nemo_rl/models/huggingface/common.py similarity index 100% rename from nemo_reinforcer/models/huggingface/common.py rename to nemo_rl/models/huggingface/common.py diff --git a/nemo_reinforcer/models/interfaces.py b/nemo_rl/models/interfaces.py similarity index 90% rename from nemo_reinforcer/models/interfaces.py rename to nemo_rl/models/interfaces.py index cb87d805ee..f194363edc 100644 --- a/nemo_reinforcer/models/interfaces.py +++ b/nemo_rl/models/interfaces.py @@ -14,9 +14,9 @@ from abc import ABC, abstractmethod from typing import Any, Dict -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.algorithms.interfaces import LossFunction -from nemo_reinforcer.models.generation.interfaces import GenerationDatumSpec +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.algorithms.interfaces import LossFunction +from nemo_rl.models.generation.interfaces import GenerationDatumSpec class PolicyInterface(ABC): diff --git a/nemo_reinforcer/models/megatron/__init__.py b/nemo_rl/models/megatron/__init__.py similarity index 100% rename from nemo_reinforcer/models/megatron/__init__.py rename to nemo_rl/models/megatron/__init__.py diff --git a/nemo_reinforcer/models/megatron/common.py b/nemo_rl/models/megatron/common.py similarity index 100% rename from nemo_reinforcer/models/megatron/common.py rename to nemo_rl/models/megatron/common.py diff --git a/nemo_reinforcer/models/policy/__init__.py b/nemo_rl/models/policy/__init__.py similarity index 94% rename from nemo_reinforcer/models/policy/__init__.py rename to nemo_rl/models/policy/__init__.py index c83a8d0bf9..ca806c4675 100644 --- a/nemo_reinforcer/models/policy/__init__.py +++ b/nemo_rl/models/policy/__init__.py @@ -14,7 +14,7 @@ from typing import TypedDict, Optional, Union -from nemo_reinforcer.models.generation.interfaces import GenerationConfig +from nemo_rl.models.generation.interfaces import GenerationConfig class DTensorConfig(TypedDict): diff --git a/nemo_reinforcer/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py similarity index 97% rename from nemo_reinforcer/models/policy/dtensor_policy_worker.py rename to nemo_rl/models/policy/dtensor_policy_worker.py index 0c75590d0a..39e0dd4974 100644 --- a/nemo_reinforcer/models/policy/dtensor_policy_worker.py +++ b/nemo_rl/models/policy/dtensor_policy_worker.py @@ -26,29 +26,29 @@ ) from transformers import AutoModelForCausalLM, AutoTokenizer from transformers.integrations.accelerate import find_tied_parameters -from nemo_reinforcer.models.dtensor.parallelize import _parallelize_model +from nemo_rl.models.dtensor.parallelize import _parallelize_model -from nemo_reinforcer.algorithms.interfaces import LossFunction -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.models.policy import PolicyConfig -from nemo_reinforcer.models.policy.utils import import_class_from_path -from nemo_reinforcer.distributed.virtual_cluster import ( +from nemo_rl.algorithms.interfaces import LossFunction +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.models.policy import PolicyConfig +from nemo_rl.models.policy.utils import import_class_from_path +from nemo_rl.distributed.virtual_cluster import ( PY_EXECUTABLES, ) from typing import Iterable, Tuple, Union from torch.distributed.tensor import DTensor -from nemo_reinforcer.models.dtensor.parallelize import ( +from nemo_rl.models.dtensor.parallelize import ( get_logprobs_from_vocab_parallel_logits, get_grad_norm, clip_grad_by_total_norm_, to_local_if_dtensor, ) -from nemo_reinforcer.utils.native_checkpoint import ( +from nemo_rl.utils.native_checkpoint import ( save_checkpoint, load_checkpoint, ) from torch import nn -from nemo_reinforcer.models.policy.utils import get_gpu_info +from nemo_rl.models.policy.utils import get_gpu_info @contextmanager @@ -140,7 +140,7 @@ def __init__( self.model = AutoModelForCausalLM.from_pretrained( model_name, device_map="cpu", # load weights onto CPU initially - torch_dtype=torch.float32, # use full precision in sft until https://github.com/NVIDIA/reinforcer/issues/13 is fixed + torch_dtype=torch.float32, # use full precision in sft until https://github.com/NVIDIA/nemo-rl/issues/13 is fixed ) self.tokenizer = tokenizer @@ -264,7 +264,7 @@ def train( and not skip_tie_check ): raise ValueError( - f"Using dtensor policy with tp size {self.cfg['dtensor_cfg']['tensor_parallel_size']} for model ({self.cfg['model_name']}) that has tied weights (num_tied_weights={num_tied_weights}) is not supported (https://github.com/NVIDIA/reinforcer/issues/227). Please use dtensor policy with tensor parallel == 1 instead." + f"Using dtensor policy with tp size {self.cfg['dtensor_cfg']['tensor_parallel_size']} for model ({self.cfg['model_name']}) that has tied weights (num_tied_weights={num_tied_weights}) is not supported (https://github.com/NVIDIA/nemo-rl/issues/227). Please use dtensor policy with tensor parallel == 1 instead." ) if gbs is None: @@ -566,7 +566,7 @@ def report_device_id(self) -> str: Returns: str: UUID of the device in the format "GPU-xxxxx" """ - from nemo_reinforcer.utils.nvml import get_device_uuid + from nemo_rl.utils.nvml import get_device_uuid # Get current device index from torch device_idx = torch.cuda.current_device() diff --git a/nemo_reinforcer/models/policy/fsdp1_policy_worker.py b/nemo_rl/models/policy/fsdp1_policy_worker.py similarity index 97% rename from nemo_reinforcer/models/policy/fsdp1_policy_worker.py rename to nemo_rl/models/policy/fsdp1_policy_worker.py index 65f01e500e..b49c2748cf 100644 --- a/nemo_reinforcer/models/policy/fsdp1_policy_worker.py +++ b/nemo_rl/models/policy/fsdp1_policy_worker.py @@ -30,9 +30,9 @@ from torch.distributed.fsdp.wrap import size_based_auto_wrap_policy from transformers import AutoModelForCausalLM -from nemo_reinforcer.algorithms.interfaces import LossFunction -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.models.generation.interfaces import ( +from nemo_rl.algorithms.interfaces import LossFunction +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.models.generation.interfaces import ( GenerationDatumSpec, GenerationOutputSpec, verify_right_padding, @@ -40,16 +40,16 @@ from transformers import AutoModelForCausalLM, AutoTokenizer from transformers.integrations.accelerate import find_tied_parameters -from nemo_reinforcer.models.policy import PolicyConfig -from nemo_reinforcer.models.policy.utils import import_class_from_path -from nemo_reinforcer.distributed.virtual_cluster import ( +from nemo_rl.models.policy import PolicyConfig +from nemo_rl.models.policy.utils import import_class_from_path +from nemo_rl.distributed.virtual_cluster import ( PY_EXECUTABLES, ) -from nemo_reinforcer.utils.native_checkpoint import ( +from nemo_rl.utils.native_checkpoint import ( save_checkpoint, load_checkpoint, ) -from nemo_reinforcer.models.policy.utils import get_gpu_info +from nemo_rl.models.policy.utils import get_gpu_info @ray.remote @@ -92,14 +92,14 @@ def __init__( self.model = AutoModelForCausalLM.from_pretrained( model_name, device_map="cpu", # load weights onto CPU initially - torch_dtype=torch.float32, # use full precision in sft until https://github.com/NVIDIA/reinforcer/issues/13 is fixed + torch_dtype=torch.float32, # use full precision in sft until https://github.com/NVIDIA/nemo-rl/issues/13 is fixed ) if init_reference_model: self.reference_model = AutoModelForCausalLM.from_pretrained( model_name, device_map="cpu", # load weights onto CPU initially - torch_dtype=torch.float32, # use full precision in sft until https://github.com/NVIDIA/reinforcer/issues/13 is fixed + torch_dtype=torch.float32, # use full precision in sft until https://github.com/NVIDIA/nemo-rl/issues/13 is fixed ) else: self.reference_model = None @@ -233,7 +233,7 @@ def train( skip_tie_check = os.environ.get("NRL_SKIP_TIED_WEIGHT_CHECK") if num_tied_weights != 0 and not skip_tie_check: raise ValueError( - f"Using FSP1 with a model ({self.cfg['model_name']}) that has tied weights (num_tied_weights={num_tied_weights}) is not supported (https://github.com/NVIDIA/reinforcer/issues/227). Please use dtensor policy with tensor parallel == 1 instead." + f"Using FSP1 with a model ({self.cfg['model_name']}) that has tied weights (num_tied_weights={num_tied_weights}) is not supported (https://github.com/NVIDIA/nemo-rl/issues/227). Please use dtensor policy with tensor parallel == 1 instead." ) if gbs is None: @@ -741,7 +741,7 @@ def report_device_id(self) -> str: Returns: str: UUID of the device in the format "GPU-xxxxx" """ - from nemo_reinforcer.utils.nvml import get_device_uuid + from nemo_rl.utils.nvml import get_device_uuid # Get current device index from torch device_idx = torch.cuda.current_device() diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_rl/models/policy/hf_policy.py similarity index 95% rename from nemo_reinforcer/models/policy/hf_policy.py rename to nemo_rl/models/policy/hf_policy.py index 0b56b002e7..b092d7eda3 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_rl/models/policy/hf_policy.py @@ -18,19 +18,19 @@ import ray from transformers import AutoTokenizer -from nemo_reinforcer.algorithms.interfaces import LossFunction -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.distributed.virtual_cluster import RayVirtualCluster -from nemo_reinforcer.distributed.worker_groups import RayWorkerBuilder, RayWorkerGroup -from nemo_reinforcer.models.generation.interfaces import ( +from nemo_rl.algorithms.interfaces import LossFunction +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.distributed.virtual_cluster import RayVirtualCluster +from nemo_rl.distributed.worker_groups import RayWorkerBuilder, RayWorkerGroup +from nemo_rl.models.generation.interfaces import ( GenerationInterface, GenerationDatumSpec, GenerationOutputSpec, ) -from nemo_reinforcer.models.interfaces import PolicyInterface -from nemo_reinforcer.models.policy import PolicyConfig -from nemo_reinforcer.models.policy.fsdp1_policy_worker import FSDP1PolicyWorker -from nemo_reinforcer.models.policy.dtensor_policy_worker import DTensorPolicyWorker +from nemo_rl.models.interfaces import PolicyInterface +from nemo_rl.models.policy import PolicyConfig +from nemo_rl.models.policy.fsdp1_policy_worker import FSDP1PolicyWorker +from nemo_rl.models.policy.dtensor_policy_worker import DTensorPolicyWorker class HfPolicy(PolicyInterface, GenerationInterface): diff --git a/nemo_reinforcer/models/policy/utils.py b/nemo_rl/models/policy/utils.py similarity index 100% rename from nemo_reinforcer/models/policy/utils.py rename to nemo_rl/models/policy/utils.py diff --git a/nemo_reinforcer/package_info.py b/nemo_rl/package_info.py similarity index 84% rename from nemo_reinforcer/package_info.py rename to nemo_rl/package_info.py index 8ca9d4a97b..2e362225a1 100644 --- a/nemo_reinforcer/package_info.py +++ b/nemo_rl/package_info.py @@ -24,12 +24,12 @@ __shortversion__ = ".".join(map(str, VERSION[:3])) __version__ = ".".join(map(str, VERSION[:3])) + "".join(VERSION[3:]) -__package_name__ = "nemo_reinforcer" +__package_name__ = "nemo_rl" __contact_names__ = "NVIDIA" __contact_emails__ = "nemo-tookit@nvidia.com" __homepage__ = "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/" -__repository_url__ = "https://github.com/NVIDIA/reinforcer" -__download_url__ = "https://github.com/NVIDIA/reinforcer/releases" -__description__ = "NeMo-Reinforcer - a toolkit for model alignment" +__repository_url__ = "https://github.com/NVIDIA/nemo-rl" +__download_url__ = "https://github.com/NVIDIA/nemo-rl/releases" +__description__ = "NeMo-RL - a toolkit for model alignment" __license__ = "Apache2" __keywords__ = "deep learning, machine learning, gpu, NLP, NeMo, nvidia, pytorch, torch, language, reinforcement learning, RLHF, preference modeling, SteerLM, DPO" diff --git a/nemo_reinforcer/utils/__init__.py b/nemo_rl/utils/__init__.py similarity index 100% rename from nemo_reinforcer/utils/__init__.py rename to nemo_rl/utils/__init__.py diff --git a/nemo_reinforcer/utils/checkpoint.py b/nemo_rl/utils/checkpoint.py similarity index 100% rename from nemo_reinforcer/utils/checkpoint.py rename to nemo_rl/utils/checkpoint.py diff --git a/nemo_reinforcer/utils/config.py b/nemo_rl/utils/config.py similarity index 100% rename from nemo_reinforcer/utils/config.py rename to nemo_rl/utils/config.py diff --git a/nemo_reinforcer/utils/logger.py b/nemo_rl/utils/logger.py similarity index 98% rename from nemo_reinforcer/utils/logger.py rename to nemo_rl/utils/logger.py index 4df96172f9..c48dfc772b 100644 --- a/nemo_reinforcer/utils/logger.py +++ b/nemo_rl/utils/logger.py @@ -30,8 +30,8 @@ from rich.logging import RichHandler import torch -from nemo_reinforcer.data.interfaces import LLMMessageLogType -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.data.interfaces import LLMMessageLogType +from nemo_rl.distributed.batched_data_dict import BatchedDataDict from torch.utils.tensorboard import SummaryWriter import ray @@ -224,7 +224,7 @@ def start(self): """Start the GPU monitoring thread.""" if not ray.is_initialized(): raise ValueError( - "Ray must be initialized with nemo_reinforcer.distributed.virtual_cluster.init_ray() before the GPU logging can begin." + "Ray must be initialized with nemo_rl.distributed.virtual_cluster.init_ray() before the GPU logging can begin." ) if self.is_running: @@ -618,7 +618,7 @@ def flatten_dict(d: Dict[str, Any], sep: str = ".") -> Dict[str, Any]: Examples: ```{doctest} - >>> from nemo_reinforcer.utils.logger import flatten_dict + >>> from nemo_rl.utils.logger import flatten_dict >>> flatten_dict({"a": 1, "b": {"c": 2}}) {'a': 1, 'b.c': 2} diff --git a/nemo_reinforcer/utils/native_checkpoint.py b/nemo_rl/utils/native_checkpoint.py similarity index 99% rename from nemo_reinforcer/utils/native_checkpoint.py rename to nemo_rl/utils/native_checkpoint.py index 39b84b8fe6..04d590e133 100644 --- a/nemo_reinforcer/utils/native_checkpoint.py +++ b/nemo_rl/utils/native_checkpoint.py @@ -269,7 +269,7 @@ def convert_dcp_to_hf( config.save_pretrained(hf_ckpt_path) # TODO: After the following PR gets merged: - # https://github.com/NVIDIA/reinforcer/pull/148/files + # https://github.com/NVIDIA/nemo-rl/pull/148/files # tokenizer should be copied from policy/tokenizer/* instead of relying on the model name # We can expose a arg at the top level --tokenizer_path to plumb that through. # This is more stable than relying on the current NeMo-RL get_tokenizer() which can diff --git a/nemo_reinforcer/utils/nvml.py b/nemo_rl/utils/nvml.py similarity index 100% rename from nemo_reinforcer/utils/nvml.py rename to nemo_rl/utils/nvml.py diff --git a/nemo_reinforcer/utils/timer.py b/nemo_rl/utils/timer.py similarity index 100% rename from nemo_reinforcer/utils/timer.py rename to nemo_rl/utils/timer.py diff --git a/nemo_reinforcer/utils/venvs.py b/nemo_rl/utils/venvs.py similarity index 89% rename from nemo_reinforcer/utils/venvs.py rename to nemo_rl/utils/venvs.py index e1cae61e0f..be34a06c8f 100644 --- a/nemo_reinforcer/utils/venvs.py +++ b/nemo_rl/utils/venvs.py @@ -47,16 +47,16 @@ def create_local_venv(py_executable: str, venv_name: str) -> str: # cluster with non-shared filesystems), you may encounter errors when workers # try to access the virtual environments # - # You can override this location by setting the REINFORCER_VENV_DIR environment variable + # You can override this location by setting the NEMO_RL_VENV_DIR environment variable - REINFORCER_VENV_DIR = os.environ.get("REINFORCER_VENV_DIR", DEFAULT_VENV_DIR) - logger.info(f"REINFORCER_VENV_DIR is set to {REINFORCER_VENV_DIR}.") + NEMO_RL_VENV_DIR = os.environ.get("NEMO_RL_VENV_DIR", DEFAULT_VENV_DIR) + logger.info(f"NEMO_RL_VENV_DIR is set to {NEMO_RL_VENV_DIR}.") # Create the venv directory if it doesn't exist - os.makedirs(REINFORCER_VENV_DIR, exist_ok=True) + os.makedirs(NEMO_RL_VENV_DIR, exist_ok=True) # Full path to the virtual environment - venv_path = os.path.join(REINFORCER_VENV_DIR, venv_name) + venv_path = os.path.join(NEMO_RL_VENV_DIR, venv_name) # Create the virtual environment uv_venv_cmd = ["uv", "venv", "--allow-existing", venv_path] diff --git a/pyproject.toml b/pyproject.toml index 83c8f86ab7..7da068da54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,12 +3,12 @@ requires = ["setuptools>=42", "wheel"] build-backend = "setuptools.build_meta" [project] -name = "nemo-reinforcer" +name = "nemo-rl" dynamic = [ "version", "readme", ] -description = "Nemo-Reinforcer: A Scalable and Efficient Post-Training Library for Models Ranging from 1 GPU to 1000s, and from Tiny to >100B Parameters" +description = "Nemo-RL: A Scalable and Efficient Post-Training Library for Models Ranging from 1 GPU to 1000s, and from Tiny to >100B Parameters" requires-python = ">=3.10" license = {text = "Apache 2.0"} dependencies = [ @@ -30,10 +30,10 @@ dependencies = [ ] [tool.setuptools] -packages = ["nemo_reinforcer"] +packages = ["nemo_rl"] [tool.setuptools.dynamic] -version = {attr = "nemo_reinforcer.__version__"} # any module attribute compatible with ast.literal_eval +version = {attr = "nemo_rl.__version__"} # any module attribute compatible with ast.literal_eval readme = {file = "README.md", content-type = "text/markdown"} [project.optional-dependencies] @@ -79,7 +79,7 @@ exclude = ''' ''' [tool.pytest.ini_options] -addopts = "--cov=nemo_reinforcer --cov-report=term --cov-report=json -s -rA -x" +addopts = "--cov=nemo_rl --cov-report=term --cov-report=json -s -rA -x" testpaths = ["tests"] python_files = "test_*.py" diff --git a/tests/unit/algorithms/test_dpo.py b/tests/unit/algorithms/test_dpo.py index fa924a745d..014d73f9ca 100644 --- a/tests/unit/algorithms/test_dpo.py +++ b/tests/unit/algorithms/test_dpo.py @@ -16,7 +16,7 @@ import torch from unittest.mock import MagicMock, patch -from nemo_reinforcer.algorithms.dpo import add_ref_logprobs_to_data +from nemo_rl.algorithms.dpo import add_ref_logprobs_to_data class MockPolicy: diff --git a/tests/unit/algorithms/test_grpo.py b/tests/unit/algorithms/test_grpo.py index da1a21244f..b219a3f8cd 100644 --- a/tests/unit/algorithms/test_grpo.py +++ b/tests/unit/algorithms/test_grpo.py @@ -16,10 +16,10 @@ import ray from typing import Dict, List, Tuple -from nemo_reinforcer.experience.rollouts import calculate_rewards -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.data.interfaces import DatumSpec, LLMMessageLogType -from nemo_reinforcer.environments.interfaces import ( +from nemo_rl.experience.rollouts import calculate_rewards +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.data.interfaces import DatumSpec, LLMMessageLogType +from nemo_rl.environments.interfaces import ( EnvironmentInterface, EnvironmentReturn, ) diff --git a/tests/unit/algorithms/test_loss_functions.py b/tests/unit/algorithms/test_loss_functions.py index aece7c9f91..d36d8c0b89 100644 --- a/tests/unit/algorithms/test_loss_functions.py +++ b/tests/unit/algorithms/test_loss_functions.py @@ -15,13 +15,13 @@ import torch import numpy as np -from nemo_reinforcer.algorithms.loss_functions import ( +from nemo_rl.algorithms.loss_functions import ( NLLLoss, ClippedPGLossFn, DPOLossFn, ) -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.algorithms.utils import ( +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.algorithms.utils import ( calculate_kl_penalty_joschu2020, masked_mean, ) diff --git a/tests/unit/algorithms/test_sft.py b/tests/unit/algorithms/test_sft.py index 737108115f..93ebdcd511 100644 --- a/tests/unit/algorithms/test_sft.py +++ b/tests/unit/algorithms/test_sft.py @@ -16,8 +16,8 @@ from unittest.mock import MagicMock import torch from torchdata.stateful_dataloader import StatefulDataLoader -from nemo_reinforcer.algorithms.sft import sft_train, _default_sft_save_state -from nemo_reinforcer.algorithms.loss_functions import NLLLoss +from nemo_rl.algorithms.sft import sft_train, _default_sft_save_state +from nemo_rl.algorithms.loss_functions import NLLLoss @pytest.fixture diff --git a/tests/unit/algorithms/test_utils.py b/tests/unit/algorithms/test_utils.py index c3cc381fe9..f2aa579320 100755 --- a/tests/unit/algorithms/test_utils.py +++ b/tests/unit/algorithms/test_utils.py @@ -15,8 +15,8 @@ import pytest from datetime import datetime from transformers import AutoTokenizer -from nemo_reinforcer.algorithms.utils import get_tokenizer -from nemo_reinforcer.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES +from nemo_rl.algorithms.utils import get_tokenizer +from nemo_rl.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES @pytest.fixture diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index e1cb3c4319..68df0f4c87 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -14,7 +14,7 @@ from io import StringIO import time import pytest -from nemo_reinforcer.utils.logger import GPUMonitoringConfig +from nemo_rl.utils.logger import GPUMonitoringConfig from tests import unit import torch import torch.distributed as dist @@ -33,7 +33,7 @@ from typing import Callable import ray import json -from nemo_reinforcer.distributed.virtual_cluster import init_ray +from nemo_rl.distributed.virtual_cluster import init_ray from typing import TypedDict from datetime import datetime import unittest.mock @@ -103,7 +103,7 @@ def pytest_sessionstart(session): start_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), metrics={}, gpu_types=[], - coverage="[n/a] run with --cov=nemo_reinforcer", + coverage="[n/a] run with --cov=nemo_rl", ) @@ -128,7 +128,7 @@ def session_data(request, init_ray_cluster): ############################################################ # 2. Gather the ray metadata # ############################################################ - from nemo_reinforcer.utils.logger import RayGpuMonitorLogger + from nemo_rl.utils.logger import RayGpuMonitorLogger logger = RayGpuMonitorLogger( collection_interval=float("inf"), @@ -231,7 +231,7 @@ def ray_gpu_monitor(init_ray_cluster): This fixture doesn't need to be called directly. """ - from nemo_reinforcer.utils.logger import RayGpuMonitorLogger + from nemo_rl.utils.logger import RayGpuMonitorLogger gpu_monitor = RayGpuMonitorLogger( collection_interval=1, diff --git a/tests/unit/data/hf_datasets/test_dpo_dataset.py b/tests/unit/data/hf_datasets/test_dpo_dataset.py index 19d9d45ef6..12cf67aef8 100644 --- a/tests/unit/data/hf_datasets/test_dpo_dataset.py +++ b/tests/unit/data/hf_datasets/test_dpo_dataset.py @@ -18,7 +18,7 @@ import pytest from unittest.mock import patch, MagicMock -from nemo_reinforcer.data.hf_datasets.dpo import DPODataset +from nemo_rl.data.hf_datasets.dpo import DPODataset @pytest.fixture diff --git a/tests/unit/data/hf_datasets/test_helpsteer.py b/tests/unit/data/hf_datasets/test_helpsteer.py index 304fd5d2ad..5d297cfe77 100644 --- a/tests/unit/data/hf_datasets/test_helpsteer.py +++ b/tests/unit/data/hf_datasets/test_helpsteer.py @@ -13,7 +13,7 @@ # limitations under the License. import pytest -from nemo_reinforcer.data.hf_datasets.helpsteer3 import ( +from nemo_rl.data.hf_datasets.helpsteer3 import ( HelpSteer3Dataset, format_helpsteer3, ) diff --git a/tests/unit/data/hf_datasets/test_prompt_response.py b/tests/unit/data/hf_datasets/test_prompt_response.py index 12b98d7fb2..d0aeccc583 100644 --- a/tests/unit/data/hf_datasets/test_prompt_response.py +++ b/tests/unit/data/hf_datasets/test_prompt_response.py @@ -15,8 +15,8 @@ import pytest import tempfile import json -from nemo_reinforcer.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES -from nemo_reinforcer.data.hf_datasets.prompt_response_dataset import ( +from nemo_rl.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES +from nemo_rl.data.hf_datasets.prompt_response_dataset import ( PromptResponseDataset, ) from transformers import AutoTokenizer diff --git a/tests/unit/data/hf_datasets/test_squad.py b/tests/unit/data/hf_datasets/test_squad.py index 47511363b5..d959f694f8 100644 --- a/tests/unit/data/hf_datasets/test_squad.py +++ b/tests/unit/data/hf_datasets/test_squad.py @@ -14,8 +14,8 @@ import pytest from transformers import AutoTokenizer -from nemo_reinforcer.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES -from nemo_reinforcer.data.hf_datasets.squad import SquadDataset +from nemo_rl.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES +from nemo_rl.data.hf_datasets.squad import SquadDataset @pytest.mark.skip(reason="dataset download is flaky") diff --git a/tests/unit/data/test_data_processor.py b/tests/unit/data/test_data_processor.py index 2f32f117f0..39cd959dd6 100644 --- a/tests/unit/data/test_data_processor.py +++ b/tests/unit/data/test_data_processor.py @@ -21,10 +21,10 @@ sys.path.append("/".join(abspath.split("/")[:-4])) from examples.run_grpo_math import math_data_processor -from nemo_reinforcer.algorithms.utils import get_tokenizer -from nemo_reinforcer.data.datasets import AllTaskProcessedDataset -from nemo_reinforcer.data.interfaces import TaskDataSpec -from nemo_reinforcer.models.policy import TokenizerConfig +from nemo_rl.algorithms.utils import get_tokenizer +from nemo_rl.data.datasets import AllTaskProcessedDataset +from nemo_rl.data.interfaces import TaskDataSpec +from nemo_rl.models.policy import TokenizerConfig basic_tokenizer_test_config: TokenizerConfig = { diff --git a/tests/unit/data/test_datasets.py b/tests/unit/data/test_datasets.py index 7486e025c4..31cad8c82d 100755 --- a/tests/unit/data/test_datasets.py +++ b/tests/unit/data/test_datasets.py @@ -16,9 +16,9 @@ import torch from unittest.mock import MagicMock -from nemo_reinforcer.data.datasets import dpo_collate_fn -from nemo_reinforcer.data.interfaces import DatumSpec -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.data.datasets import dpo_collate_fn +from nemo_rl.data.interfaces import DatumSpec +from nemo_rl.distributed.batched_data_dict import BatchedDataDict def test_dpo_collate_fn(): diff --git a/tests/unit/data/test_llm_message_utils.py b/tests/unit/data/test_llm_message_utils.py index 2ff25beb1c..1a7ddc568c 100644 --- a/tests/unit/data/test_llm_message_utils.py +++ b/tests/unit/data/test_llm_message_utils.py @@ -17,7 +17,7 @@ from typing import Dict, List from transformers import AutoTokenizer -from nemo_reinforcer.data.llm_message_utils import ( +from nemo_rl.data.llm_message_utils import ( message_log_to_flat_messages, get_keys_from_message_log, batched_message_log_to_flat_message, @@ -25,7 +25,7 @@ add_loss_mask_to_message_log, get_first_index_that_differs, ) -from nemo_reinforcer.data.interfaces import LLMMessageLogType, TaskDataSpec +from nemo_rl.data.interfaces import LLMMessageLogType, TaskDataSpec @pytest.fixture diff --git a/tests/unit/distributed/test_batched_data_dict.py b/tests/unit/distributed/test_batched_data_dict.py index acda98b6c2..f95814eac4 100644 --- a/tests/unit/distributed/test_batched_data_dict.py +++ b/tests/unit/distributed/test_batched_data_dict.py @@ -13,7 +13,7 @@ # limitations under the License. import pytest import torch -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.distributed.batched_data_dict import BatchedDataDict def test_shard_by_batch_size_basic(): diff --git a/tests/unit/distributed/test_cluster_visualization.py b/tests/unit/distributed/test_cluster_visualization.py index 9253579cea..00243025e5 100644 --- a/tests/unit/distributed/test_cluster_visualization.py +++ b/tests/unit/distributed/test_cluster_visualization.py @@ -15,7 +15,7 @@ from unittest.mock import patch, MagicMock import pytest -from nemo_reinforcer.distributed.virtual_cluster import RayVirtualCluster +from nemo_rl.distributed.virtual_cluster import RayVirtualCluster @pytest.fixture(autouse=True) @@ -23,10 +23,10 @@ def mock_virtual_cluster_pg(): # Mock the _init_placement_groups and get_placement_groups methods to avoid actually initializing placement groups with ( patch( - "nemo_reinforcer.distributed.virtual_cluster.RayVirtualCluster.get_placement_groups" + "nemo_rl.distributed.virtual_cluster.RayVirtualCluster.get_placement_groups" ) as mock_get_pg, patch( - "nemo_reinforcer.distributed.virtual_cluster.RayVirtualCluster._init_placement_groups" + "nemo_rl.distributed.virtual_cluster.RayVirtualCluster._init_placement_groups" ) as mock_init_pg, ): mock_get_pg.return_value = [] diff --git a/tests/unit/distributed/test_collectives.py b/tests/unit/distributed/test_collectives.py index e8cd5c4f88..72b900cb73 100644 --- a/tests/unit/distributed/test_collectives.py +++ b/tests/unit/distributed/test_collectives.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from nemo_reinforcer.distributed.collectives import ( +from nemo_rl.distributed.collectives import ( rebalance_nd_tensor, gather_jagged_object_lists, ) diff --git a/tests/unit/distributed/test_virtual_cluster.py b/tests/unit/distributed/test_virtual_cluster.py index bdf24f3b18..8d26ed5a33 100644 --- a/tests/unit/distributed/test_virtual_cluster.py +++ b/tests/unit/distributed/test_virtual_cluster.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from nemo_reinforcer.distributed.virtual_cluster import ( +from nemo_rl.distributed.virtual_cluster import ( _get_node_ip_and_free_port, PY_EXECUTABLES, RayVirtualCluster, @@ -67,7 +67,7 @@ def test_env_max_retries_default_value(): with ( patch.dict(os.environ, {}, clear=True), patch( - "nemo_reinforcer.distributed.virtual_cluster.RayVirtualCluster._init_placement_groups" + "nemo_rl.distributed.virtual_cluster.RayVirtualCluster._init_placement_groups" ) as mock_init, ): # Mock successful initialization @@ -91,7 +91,7 @@ def test_env_max_retries_exhausted(): with ( patch.dict(os.environ, env_vars, clear=True), patch( - "nemo_reinforcer.distributed.virtual_cluster.RayVirtualCluster._init_placement_groups" + "nemo_rl.distributed.virtual_cluster.RayVirtualCluster._init_placement_groups" ) as mock_init, patch("time.sleep") as mock_sleep, ): diff --git a/tests/unit/environments/test_math_environment.py b/tests/unit/environments/test_math_environment.py index c26035ce15..4b2d4069cb 100644 --- a/tests/unit/environments/test_math_environment.py +++ b/tests/unit/environments/test_math_environment.py @@ -13,7 +13,7 @@ # limitations under the License. import pytest import ray -from nemo_reinforcer.environments.math_environment import MathEnvironment +from nemo_rl.environments.math_environment import MathEnvironment import time import os diff --git a/tests/unit/experience/test_rollouts.py b/tests/unit/experience/test_rollouts.py index a1e72fa6a7..cc71b366b4 100644 --- a/tests/unit/experience/test_rollouts.py +++ b/tests/unit/experience/test_rollouts.py @@ -20,12 +20,12 @@ from transformers import AutoTokenizer -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.distributed.virtual_cluster import RayVirtualCluster -from nemo_reinforcer.models.policy import PolicyConfig -from nemo_reinforcer.models.policy.hf_policy import HfPolicy -from nemo_reinforcer.models.generation.interfaces import configure_generation_config -from nemo_reinforcer.experience.rollouts import run_multi_turn_rollout +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.distributed.virtual_cluster import RayVirtualCluster +from nemo_rl.models.policy import PolicyConfig +from nemo_rl.models.policy.hf_policy import HfPolicy +from nemo_rl.models.generation.interfaces import configure_generation_config +from nemo_rl.experience.rollouts import run_multi_turn_rollout # Import the test environment definitions from tests.unit.test_envs import ( @@ -34,14 +34,15 @@ MultiStepCalcMetadata, ) -from nemo_reinforcer.environments.games.sliding_puzzle import ( +from nemo_rl.environments.games.sliding_puzzle import ( SlidingPuzzleGameLogic, SlidingPuzzleEnv, SlidingPuzzleConfig, SlidingPuzzleMetadata, ) -from nemo_reinforcer.models.generation.vllm import VllmConfig, VllmGeneration +from nemo_rl.models.generation.vllm import VllmConfig, VllmGeneration + MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct" diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py index d584de146d..da454a9265 100644 --- a/tests/unit/models/generation/test_vllm_generation.py +++ b/tests/unit/models/generation/test_vllm_generation.py @@ -19,13 +19,13 @@ import ray import os -from nemo_reinforcer.algorithms.grpo import refit_policy_generation -from nemo_reinforcer.algorithms.utils import get_tokenizer -from nemo_reinforcer.distributed.virtual_cluster import RayVirtualCluster -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.models.generation.interfaces import configure_generation_config -from nemo_reinforcer.models.generation.vllm import VllmGeneration, VllmConfig -from nemo_reinforcer.models.policy import PolicyConfig +from nemo_rl.algorithms.grpo import refit_policy_generation +from nemo_rl.algorithms.utils import get_tokenizer +from nemo_rl.distributed.virtual_cluster import RayVirtualCluster +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.models.generation.interfaces import configure_generation_config +from nemo_rl.models.generation.vllm import VllmGeneration, VllmConfig +from nemo_rl.models.policy import PolicyConfig # Define basic vLLM test config basic_vllm_test_config: VllmConfig = { @@ -239,7 +239,7 @@ def test_vllm_worker_seed_behavior(cluster, tokenizer): 1. Different workers generate different outputs for identical prompts due to different seeds 2. When forced to use the same seed, workers generate identical outputs """ - from nemo_reinforcer.models.generation.vllm import VllmGenerationWorker + from nemo_rl.models.generation.vllm import VllmGenerationWorker unique_prompts = [ "Hello, my name is", @@ -277,7 +277,7 @@ def test_vllm_worker_seed_behavior(cluster, tokenizer): policy = VllmGeneration(cluster, vllm_config) policy.finish_generation() - from nemo_reinforcer.models.policy.hf_policy import HfPolicy + from nemo_rl.models.policy.hf_policy import HfPolicy hf_config = get_basic_hf_test_config(enable_dtensor=False) hf_policy = HfPolicy(cluster, hf_config, tokenizer) @@ -382,7 +382,7 @@ def test_vllm_generation_with_hf_training(cluster, tokenizer, enable_dtensor): This test validates that the two policies can work together. """ - from nemo_reinforcer.models.policy.hf_policy import HfPolicy + from nemo_rl.models.policy.hf_policy import HfPolicy from tests.unit.test_utils import nll_loss # Create separate configs for each policy @@ -666,7 +666,7 @@ def test_vllm_weight_update_and_prefix_cache_reset( cluster, tokenizer, tensor_parallel_size, enable_dtensor ): """Test that the vLLM prefix cache is correctly reset when weights change.""" - from nemo_reinforcer.models.policy.hf_policy import HfPolicy + from nemo_rl.models.policy.hf_policy import HfPolicy # Create configs vllm_config = deepcopy(basic_vllm_test_config) @@ -765,7 +765,7 @@ def test_vllm_weight_update_and_prefix_cache_reset( @pytest.mark.parametrize("enable_dtensor", [True, False]) def test_vllm_weight_update_memory(cluster, tokenizer, enable_dtensor): """Test that vLLM streaming weight update and can save memory.""" - from nemo_reinforcer.models.policy.hf_policy import HfPolicy + from nemo_rl.models.policy.hf_policy import HfPolicy if cluster.num_gpus_per_node < 2: pytest.skip("Need at least 2 GPUs per node for this test") @@ -831,7 +831,7 @@ def test_vllm_generation_with_stop( cluster, test_input_data, tokenizer, is_eval, enable_dtensor ): """Test vLLM generation with stop.""" - from nemo_reinforcer.models.policy.hf_policy import HfPolicy + from nemo_rl.models.policy.hf_policy import HfPolicy # Create separate configs for each policy vllm_config = basic_vllm_test_config.copy() diff --git a/tests/unit/models/policy/test_dtensor_worker.py b/tests/unit/models/policy/test_dtensor_worker.py index 50c8d146cc..9472abeb3a 100644 --- a/tests/unit/models/policy/test_dtensor_worker.py +++ b/tests/unit/models/policy/test_dtensor_worker.py @@ -20,14 +20,14 @@ # Define a custom marker for model configuration tests pytestmark = pytest.mark.modelconfig -from nemo_reinforcer.algorithms.interfaces import LossFunction -from nemo_reinforcer.algorithms.utils import get_tokenizer -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.distributed.virtual_cluster import RayVirtualCluster -from nemo_reinforcer.models.generation.interfaces import configure_generation_config -from nemo_reinforcer.models.policy import PolicyConfig -from nemo_reinforcer.models.policy.hf_policy import HfPolicy -from nemo_reinforcer.models.policy.dtensor_policy_worker import DTensorPolicyWorker +from nemo_rl.algorithms.interfaces import LossFunction +from nemo_rl.algorithms.utils import get_tokenizer +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.distributed.virtual_cluster import RayVirtualCluster +from nemo_rl.models.generation.interfaces import configure_generation_config +from nemo_rl.models.policy import PolicyConfig +from nemo_rl.models.policy.hf_policy import HfPolicy +from nemo_rl.models.policy.dtensor_policy_worker import DTensorPolicyWorker from tests.unit.test_utils import simple_loss from tests.unit.conftest import TEST_ASSETS from transformers import AutoModelForCausalLM diff --git a/tests/unit/models/policy/test_fsdp1_worker.py b/tests/unit/models/policy/test_fsdp1_worker.py index 2d30c90612..c4fa020bae 100644 --- a/tests/unit/models/policy/test_fsdp1_worker.py +++ b/tests/unit/models/policy/test_fsdp1_worker.py @@ -19,13 +19,13 @@ import os from copy import deepcopy -from nemo_reinforcer.algorithms.interfaces import LossFunction -from nemo_reinforcer.algorithms.utils import get_tokenizer -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.distributed.virtual_cluster import RayVirtualCluster -from nemo_reinforcer.models.generation.interfaces import configure_generation_config -from nemo_reinforcer.models.policy import PolicyConfig -from nemo_reinforcer.models.policy.hf_policy import HfPolicy +from nemo_rl.algorithms.interfaces import LossFunction +from nemo_rl.algorithms.utils import get_tokenizer +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.distributed.virtual_cluster import RayVirtualCluster +from nemo_rl.models.generation.interfaces import configure_generation_config +from nemo_rl.models.policy import PolicyConfig +from nemo_rl.models.policy.hf_policy import HfPolicy from tests.unit.test_utils import simple_loss, nll_loss diff --git a/tests/unit/test_envs.py b/tests/unit/test_envs.py index 5fe62f135b..2e139f272a 100644 --- a/tests/unit/test_envs.py +++ b/tests/unit/test_envs.py @@ -16,13 +16,13 @@ import torch from typing import Dict, List, Tuple, Optional, TypedDict, Literal, Any -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.data.interfaces import LLMMessageLogType -from nemo_reinforcer.environments.interfaces import ( +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.data.interfaces import LLMMessageLogType +from nemo_rl.environments.interfaces import ( EnvironmentInterface, EnvironmentReturn, ) -from nemo_reinforcer.distributed.virtual_cluster import PY_EXECUTABLES +from nemo_rl.distributed.virtual_cluster import PY_EXECUTABLES class MultiStepCalcMetadata(TypedDict): diff --git a/tests/unit/test_meta.py b/tests/unit/test_meta.py index 4b9e5bcc23..8b43250dec 100644 --- a/tests/unit/test_meta.py +++ b/tests/unit/test_meta.py @@ -19,10 +19,10 @@ def test_usage_stats_disabled_by_default(): - import nemo_reinforcer + import nemo_rl assert os.environ["RAY_USAGE_STATS_ENABLED"] == "0", ( - f"Our dockerfile, slurm submission script and default environment setting when importing reinforcer should all disable usage stats collection. This failing is not expected." + f"Our dockerfile, slurm submission script and default environment setting when importing nemo rl should all disable usage stats collection. This failing is not expected." ) @@ -30,5 +30,5 @@ def test_usage_stats_disabled_in_tests(): import tests assert os.environ["RAY_USAGE_STATS_ENABLED"] == "0", ( - f"Our dockerfile, slurm submission script and default environment setting when importing reinforcer should all disable usage stats collection. This failing is not expected." + f"Our dockerfile, slurm submission script and default environment setting when importing nemo rl should all disable usage stats collection. This failing is not expected." ) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 9972c1a1b6..2d3318a053 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -14,7 +14,7 @@ from typing import Any, Dict, Tuple import torch -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.distributed.batched_data_dict import BatchedDataDict def simple_loss( diff --git a/tests/unit/utils/test_checkpoint.py b/tests/unit/utils/test_checkpoint.py index fe8f2aac67..dc6ad0dd5d 100644 --- a/tests/unit/utils/test_checkpoint.py +++ b/tests/unit/utils/test_checkpoint.py @@ -16,7 +16,7 @@ import torch import numpy as np from pathlib import Path -from nemo_reinforcer.utils.checkpoint import CheckpointManager +from nemo_rl.utils.checkpoint import CheckpointManager @pytest.fixture diff --git a/tests/unit/utils/test_config.py b/tests/unit/utils/test_config.py index b8aa5328d2..7c893befa3 100644 --- a/tests/unit/utils/test_config.py +++ b/tests/unit/utils/test_config.py @@ -16,7 +16,7 @@ import pytest -from nemo_reinforcer.utils.config import load_config +from nemo_rl.utils.config import load_config @pytest.fixture @@ -202,7 +202,7 @@ def test_parse_hydra_overrides(): """Test parsing and applying Hydra overrides.""" from omegaconf import OmegaConf - from nemo_reinforcer.utils.config import OverridesError, parse_hydra_overrides + from nemo_rl.utils.config import OverridesError, parse_hydra_overrides # Create initial config cfg = OmegaConf.create( diff --git a/tests/unit/utils/test_logger.py b/tests/unit/utils/test_logger.py index ac2ce9cc43..ea88a69c7f 100644 --- a/tests/unit/utils/test_logger.py +++ b/tests/unit/utils/test_logger.py @@ -18,7 +18,7 @@ import pytest -from nemo_reinforcer.utils.logger import ( +from nemo_rl.utils.logger import ( Logger, TensorboardLogger, WandbLogger, @@ -62,7 +62,7 @@ def temp_dir(self): yield temp_dir shutil.rmtree(temp_dir) - @patch("nemo_reinforcer.utils.logger.SummaryWriter") + @patch("nemo_rl.utils.logger.SummaryWriter") def test_init(self, mock_summary_writer, temp_dir): """Test initialization of TensorboardLogger.""" cfg = {"log_dir": temp_dir} @@ -71,7 +71,7 @@ def test_init(self, mock_summary_writer, temp_dir): # The log_dir is passed to SummaryWriter but not stored as an attribute mock_summary_writer.assert_called_once_with(log_dir=temp_dir) - @patch("nemo_reinforcer.utils.logger.SummaryWriter") + @patch("nemo_rl.utils.logger.SummaryWriter") def test_log_metrics(self, mock_summary_writer, temp_dir): """Test logging metrics to TensorboardLogger.""" cfg = {"log_dir": temp_dir} @@ -87,7 +87,7 @@ def test_log_metrics(self, mock_summary_writer, temp_dir): mock_writer.add_scalar.assert_any_call("loss", 0.5, 10) mock_writer.add_scalar.assert_any_call("accuracy", 0.8, 10) - @patch("nemo_reinforcer.utils.logger.SummaryWriter") + @patch("nemo_rl.utils.logger.SummaryWriter") def test_log_metrics_with_prefix(self, mock_summary_writer, temp_dir): """Test logging metrics with a prefix to TensorboardLogger.""" cfg = {"log_dir": temp_dir} @@ -104,7 +104,7 @@ def test_log_metrics_with_prefix(self, mock_summary_writer, temp_dir): mock_writer.add_scalar.assert_any_call("train/loss", 0.5, 10) mock_writer.add_scalar.assert_any_call("train/accuracy", 0.8, 10) - @patch("nemo_reinforcer.utils.logger.SummaryWriter") + @patch("nemo_rl.utils.logger.SummaryWriter") def test_log_hyperparams(self, mock_summary_writer, temp_dir): """Test logging hyperparameters to TensorboardLogger.""" cfg = {"log_dir": temp_dir} @@ -135,7 +135,7 @@ def temp_dir(self): yield temp_dir shutil.rmtree(temp_dir) - @patch("nemo_reinforcer.utils.logger.wandb") + @patch("nemo_rl.utils.logger.wandb") def test_init_custom_config(self, mock_wandb, temp_dir): """Test initialization of WandbLogger with custom config.""" cfg = { @@ -156,7 +156,7 @@ def test_init_custom_config(self, mock_wandb, temp_dir): dir=temp_dir, ) - @patch("nemo_reinforcer.utils.logger.wandb") + @patch("nemo_rl.utils.logger.wandb") def test_log_metrics(self, mock_wandb): """Test logging metrics to WandbLogger.""" cfg = {} @@ -170,7 +170,7 @@ def test_log_metrics(self, mock_wandb): mock_run = mock_wandb.init.return_value mock_run.log.assert_called_once_with(metrics, step=step) - @patch("nemo_reinforcer.utils.logger.wandb") + @patch("nemo_rl.utils.logger.wandb") def test_log_metrics_with_prefix(self, mock_wandb): """Test logging metrics with a prefix to WandbLogger.""" cfg = {} @@ -186,7 +186,7 @@ def test_log_metrics_with_prefix(self, mock_wandb): expected_metrics = {"train/loss": 0.5, "train/accuracy": 0.8} mock_run.log.assert_called_once_with(expected_metrics, step=step) - @patch("nemo_reinforcer.utils.logger.wandb") + @patch("nemo_rl.utils.logger.wandb") def test_log_metrics_with_step_metric(self, mock_wandb): """Test logging metrics with a step metric to WandbLogger.""" cfg = {} @@ -206,7 +206,7 @@ def test_log_metrics_with_step_metric(self, mock_wandb): mock_run = mock_wandb.init.return_value mock_run.log.assert_called_once_with(metrics, commit=False) - @patch("nemo_reinforcer.utils.logger.wandb") + @patch("nemo_rl.utils.logger.wandb") def test_log_metrics_with_prefix_and_step_metric(self, mock_wandb): """Test logging metrics with both prefix and step metric.""" cfg = {} @@ -232,7 +232,7 @@ def test_log_metrics_with_prefix_and_step_metric(self, mock_wandb): } mock_run.log.assert_called_once_with(expected_metrics, commit=False) - @patch("nemo_reinforcer.utils.logger.wandb") + @patch("nemo_rl.utils.logger.wandb") def test_define_metric(self, mock_wandb): """Test defining a metric with a custom step metric.""" cfg = {} @@ -247,7 +247,7 @@ def test_define_metric(self, mock_wandb): "ray/*", step_metric="ray/ray_step" ) - @patch("nemo_reinforcer.utils.logger.wandb") + @patch("nemo_rl.utils.logger.wandb") def test_log_hyperparams(self, mock_wandb): """Test logging hyperparameters to WandbLogger.""" cfg = {} @@ -290,7 +290,7 @@ def log_metrics(self, metrics, step, prefix="", step_metric=None): return MockLogger() - @patch("nemo_reinforcer.utils.logger.ray") + @patch("nemo_rl.utils.logger.ray") def test_init(self, mock_ray): """Test initialization of RayGpuMonitorLogger.""" # Mock ray.is_initialized to return True @@ -315,8 +315,8 @@ def test_init(self, mock_ray): assert monitor.is_running is False assert monitor.collection_thread is None - @patch("nemo_reinforcer.utils.logger.ray") - @patch("nemo_reinforcer.utils.logger.threading.Thread") + @patch("nemo_rl.utils.logger.ray") + @patch("nemo_rl.utils.logger.threading.Thread") def test_start(self, mock_thread, mock_ray): """Test start method of RayGpuMonitorLogger.""" # Mock ray.is_initialized to return True @@ -342,7 +342,7 @@ def test_start(self, mock_thread, mock_ray): assert monitor.is_running is True assert monitor.collection_thread is mock_thread.return_value - @patch("nemo_reinforcer.utils.logger.ray") + @patch("nemo_rl.utils.logger.ray") def test_start_ray_not_initialized(self, mock_ray): """Test start method when Ray is not initialized.""" # Mock ray.is_initialized to return False @@ -361,8 +361,8 @@ def test_start_ray_not_initialized(self, mock_ray): with pytest.raises(ValueError): monitor.start() - @patch("nemo_reinforcer.utils.logger.ray") - @patch("nemo_reinforcer.utils.logger.threading.Thread") + @patch("nemo_rl.utils.logger.ray") + @patch("nemo_rl.utils.logger.threading.Thread") def test_stop(self, mock_thread, mock_ray): """Test stop method of RayGpuMonitorLogger.""" # Mock ray.is_initialized to return True @@ -391,7 +391,7 @@ def test_stop(self, mock_thread, mock_ray): # Verify monitor state assert monitor.is_running is False - @patch("nemo_reinforcer.utils.logger.ray") + @patch("nemo_rl.utils.logger.ray") def test_parse_gpu_metric(self, mock_ray): """Test _parse_gpu_metric method.""" # Mock ray.is_initialized to return True @@ -468,8 +468,8 @@ def test_parse_gpu_metric(self, mock_ray): # Verify the result is empty assert result == {} - @patch("nemo_reinforcer.utils.logger.ray") - @patch("nemo_reinforcer.utils.logger.requests.get") + @patch("nemo_rl.utils.logger.ray") + @patch("nemo_rl.utils.logger.requests.get") def test_fetch_and_parse_metrics(self, mock_get, mock_ray): """Test _fetch_and_parse_metrics method.""" # Mock ray.is_initialized to return True @@ -522,7 +522,7 @@ def test_fetch_and_parse_metrics(self, mock_get, mock_ray): # Verify the result combines both metrics assert result == {"node.2.gpu.0.gpu": 75.5, "node.2.gpu.0.memory": 4096.0} - @patch("nemo_reinforcer.utils.logger.ray") + @patch("nemo_rl.utils.logger.ray") def test_collect_metrics(self, mock_ray): """Test _collect_metrics method.""" # Mock ray.is_initialized to return True @@ -566,7 +566,7 @@ def test_collect_metrics(self, mock_ray): "node.1.gpu.0.memory": 2048.0, } - @patch("nemo_reinforcer.utils.logger.ray") + @patch("nemo_rl.utils.logger.ray") def test_flush_empty_buffer(self, mock_ray, mock_parent_logger): """Test flush method with empty buffer.""" # Mock ray.is_initialized to return True @@ -587,7 +587,7 @@ def test_flush_empty_buffer(self, mock_ray, mock_parent_logger): # Verify parent logger's log_metrics was not called assert len(mock_parent_logger.logged_metrics) == 0 - @patch("nemo_reinforcer.utils.logger.ray") + @patch("nemo_rl.utils.logger.ray") def test_flush(self, mock_ray, mock_parent_logger): """Test flush method with metrics in buffer.""" # Mock ray.is_initialized to return True @@ -645,7 +645,7 @@ def test_flush(self, mock_ray, mock_parent_logger): # Verify buffer was cleared assert monitor.metrics_buffer == [] - @patch("nemo_reinforcer.utils.logger.ray") + @patch("nemo_rl.utils.logger.ray") def test_flush_with_custom_prefix(self, mock_ray, mock_parent_logger): """Test flush method with custom metric prefix.""" # Mock ray.is_initialized to return True @@ -681,8 +681,8 @@ def test_flush_with_custom_prefix(self, mock_ray, mock_parent_logger): assert mock_parent_logger.logged_prefixes[0] == custom_prefix assert mock_parent_logger.logged_step_metrics[0] == custom_step_metric - @patch("nemo_reinforcer.utils.logger.ray") - @patch("nemo_reinforcer.utils.logger.time") + @patch("nemo_rl.utils.logger.ray") + @patch("nemo_rl.utils.logger.time") def test_collection_loop(self, mock_time, mock_ray): """Test _collection_loop method (one iteration).""" # Mock ray.is_initialized to return True @@ -739,9 +739,9 @@ def side_effect(): # Verify flush was called (flush_interval elapsed) mock_flush.assert_called_once() - @patch("nemo_reinforcer.utils.logger.WandbLogger") - @patch("nemo_reinforcer.utils.logger.TensorboardLogger") - @patch("nemo_reinforcer.utils.logger.RayGpuMonitorLogger") + @patch("nemo_rl.utils.logger.WandbLogger") + @patch("nemo_rl.utils.logger.TensorboardLogger") + @patch("nemo_rl.utils.logger.RayGpuMonitorLogger") def test_init_with_gpu_monitoring( self, mock_gpu_monitor, mock_tb_logger, mock_wandb_logger, temp_dir ): @@ -784,9 +784,9 @@ def test_init_with_gpu_monitoring( "ray/*", step_metric="ray/ray_step" ) - @patch("nemo_reinforcer.utils.logger.WandbLogger") - @patch("nemo_reinforcer.utils.logger.TensorboardLogger") - @patch("nemo_reinforcer.utils.logger.RayGpuMonitorLogger") + @patch("nemo_rl.utils.logger.WandbLogger") + @patch("nemo_rl.utils.logger.TensorboardLogger") + @patch("nemo_rl.utils.logger.RayGpuMonitorLogger") def test_gpu_monitoring_without_wandb( self, mock_gpu_monitor, mock_tb_logger, mock_wandb_logger, temp_dir ): @@ -833,8 +833,8 @@ def temp_dir(self): yield temp_dir shutil.rmtree(temp_dir) - @patch("nemo_reinforcer.utils.logger.WandbLogger") - @patch("nemo_reinforcer.utils.logger.TensorboardLogger") + @patch("nemo_rl.utils.logger.WandbLogger") + @patch("nemo_rl.utils.logger.TensorboardLogger") def test_init_no_loggers(self, mock_tb_logger, mock_wandb_logger, temp_dir): """Test initialization with no loggers enabled.""" cfg = { @@ -849,8 +849,8 @@ def test_init_no_loggers(self, mock_tb_logger, mock_wandb_logger, temp_dir): mock_tb_logger.assert_not_called() mock_wandb_logger.assert_not_called() - @patch("nemo_reinforcer.utils.logger.WandbLogger") - @patch("nemo_reinforcer.utils.logger.TensorboardLogger") + @patch("nemo_rl.utils.logger.WandbLogger") + @patch("nemo_rl.utils.logger.TensorboardLogger") def test_init_wandb_only(self, mock_tb_logger, mock_wandb_logger, temp_dir): """Test initialization with only WandbLogger enabled.""" cfg = { @@ -868,8 +868,8 @@ def test_init_wandb_only(self, mock_tb_logger, mock_wandb_logger, temp_dir): assert wandb_cfg == {"project": "test-project"} mock_tb_logger.assert_not_called() - @patch("nemo_reinforcer.utils.logger.WandbLogger") - @patch("nemo_reinforcer.utils.logger.TensorboardLogger") + @patch("nemo_rl.utils.logger.WandbLogger") + @patch("nemo_rl.utils.logger.TensorboardLogger") def test_init_tensorboard_only(self, mock_tb_logger, mock_wandb_logger, temp_dir): """Test initialization with only TensorboardLogger enabled.""" cfg = { @@ -887,8 +887,8 @@ def test_init_tensorboard_only(self, mock_tb_logger, mock_wandb_logger, temp_dir assert tb_cfg == {"log_dir": "test_logs"} mock_wandb_logger.assert_not_called() - @patch("nemo_reinforcer.utils.logger.WandbLogger") - @patch("nemo_reinforcer.utils.logger.TensorboardLogger") + @patch("nemo_rl.utils.logger.WandbLogger") + @patch("nemo_rl.utils.logger.TensorboardLogger") def test_init_both_loggers(self, mock_tb_logger, mock_wandb_logger, temp_dir): """Test initialization with both loggers enabled.""" cfg = { @@ -910,8 +910,8 @@ def test_init_both_loggers(self, mock_tb_logger, mock_wandb_logger, temp_dir): tb_cfg = mock_tb_logger.call_args[0][0] assert tb_cfg == {"log_dir": "test_logs"} - @patch("nemo_reinforcer.utils.logger.WandbLogger") - @patch("nemo_reinforcer.utils.logger.TensorboardLogger") + @patch("nemo_rl.utils.logger.WandbLogger") + @patch("nemo_rl.utils.logger.TensorboardLogger") def test_log_metrics(self, mock_tb_logger, mock_wandb_logger, temp_dir): """Test logging metrics to all enabled loggers.""" cfg = { @@ -936,8 +936,8 @@ def test_log_metrics(self, mock_tb_logger, mock_wandb_logger, temp_dir): mock_wandb_instance.log_metrics.assert_called_once_with(metrics, step, "", None) mock_tb_instance.log_metrics.assert_called_once_with(metrics, step, "", None) - @patch("nemo_reinforcer.utils.logger.WandbLogger") - @patch("nemo_reinforcer.utils.logger.TensorboardLogger") + @patch("nemo_rl.utils.logger.WandbLogger") + @patch("nemo_rl.utils.logger.TensorboardLogger") def test_log_hyperparams(self, mock_tb_logger, mock_wandb_logger, temp_dir): """Test logging hyperparameters to all enabled loggers.""" cfg = { @@ -961,9 +961,9 @@ def test_log_hyperparams(self, mock_tb_logger, mock_wandb_logger, temp_dir): mock_wandb_instance.log_hyperparams.assert_called_once_with(params) mock_tb_instance.log_hyperparams.assert_called_once_with(params) - @patch("nemo_reinforcer.utils.logger.WandbLogger") - @patch("nemo_reinforcer.utils.logger.TensorboardLogger") - @patch("nemo_reinforcer.utils.logger.RayGpuMonitorLogger") + @patch("nemo_rl.utils.logger.WandbLogger") + @patch("nemo_rl.utils.logger.TensorboardLogger") + @patch("nemo_rl.utils.logger.RayGpuMonitorLogger") def test_init_with_gpu_monitoring( self, mock_gpu_monitor, mock_tb_logger, mock_wandb_logger, temp_dir ): @@ -1006,8 +1006,8 @@ def test_init_with_gpu_monitoring( "ray/*", step_metric="ray/ray_step" ) - @patch("nemo_reinforcer.utils.logger.WandbLogger") - @patch("nemo_reinforcer.utils.logger.TensorboardLogger") + @patch("nemo_rl.utils.logger.WandbLogger") + @patch("nemo_rl.utils.logger.TensorboardLogger") def test_log_metrics_with_prefix_and_step_metric( self, mock_tb_logger, mock_wandb_logger, temp_dir ): diff --git a/tests/unit/utils/test_native_checkpoint.py b/tests/unit/utils/test_native_checkpoint.py index 979d2786b8..54962b4f38 100755 --- a/tests/unit/utils/test_native_checkpoint.py +++ b/tests/unit/utils/test_native_checkpoint.py @@ -17,12 +17,12 @@ import torch from tempfile import TemporaryDirectory -from nemo_reinforcer.algorithms.utils import get_tokenizer -from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict -from nemo_reinforcer.distributed.virtual_cluster import RayVirtualCluster -from nemo_reinforcer.models.policy.hf_policy import HfPolicy +from nemo_rl.algorithms.utils import get_tokenizer +from nemo_rl.distributed.batched_data_dict import BatchedDataDict +from nemo_rl.distributed.virtual_cluster import RayVirtualCluster +from nemo_rl.models.policy.hf_policy import HfPolicy from transformers import AutoTokenizer, AutoModelForCausalLM -from nemo_reinforcer.utils.native_checkpoint import ( +from nemo_rl.utils.native_checkpoint import ( load_checkpoint, save_checkpoint, ModelState, @@ -411,7 +411,7 @@ def test_convert_dcp_to_hf(policy, num_gpus): os.path.join(tmp_dir, "test_hf_and_dcp-hf-offline"), simple_policy_config["model_name"], # TODO: After the following PR gets merged: - # https://github.com/NVIDIA/reinforcer/pull/148/files + # https://github.com/NVIDIA/nemo-rl/pull/148/files # tokenizer should be copied from policy/tokenizer/* instead of relying on the model name # We can expose a arg at the top level --tokenizer_path to plumb that through. # This is more stable than relying on the current NeMo-RL get_tokenizer() which can diff --git a/tests/unit/utils/test_pynvml.py b/tests/unit/utils/test_pynvml.py index cf7044654f..f9b667779d 100644 --- a/tests/unit/utils/test_pynvml.py +++ b/tests/unit/utils/test_pynvml.py @@ -14,14 +14,14 @@ import os from unittest.mock import patch -from nemo_reinforcer.utils.nvml import ( +from nemo_rl.utils.nvml import ( nvml_context, device_id_to_physical_device_id, get_device_uuid, ) -@patch("nemo_reinforcer.utils.nvml.pynvml") +@patch("nemo_rl.utils.nvml.pynvml") def test_nvml_context(mock_pynvml): """Test that nvml_context initializes and shuts down NVML.""" with nvml_context(): @@ -42,8 +42,8 @@ def test_device_id_conversion(): assert device_id_to_physical_device_id(1) == 3 -@patch("nemo_reinforcer.utils.nvml.device_id_to_physical_device_id") -@patch("nemo_reinforcer.utils.nvml.pynvml") +@patch("nemo_rl.utils.nvml.device_id_to_physical_device_id") +@patch("nemo_rl.utils.nvml.pynvml") def test_get_device_uuid(mock_pynvml, mock_convert_id): """Test that get_device_uuid correctly retrieves a UUID.""" diff --git a/tests/unit/utils/test_timer.py b/tests/unit/utils/test_timer.py index 28eebb5b42..5ae6ae8687 100644 --- a/tests/unit/utils/test_timer.py +++ b/tests/unit/utils/test_timer.py @@ -17,7 +17,7 @@ import numpy as np from unittest.mock import patch -from nemo_reinforcer.utils.timer import Timer +from nemo_rl.utils.timer import Timer class TestTimer: diff --git a/tests/unit/utils/test_venvs.py b/tests/unit/utils/test_venvs.py index aac294ff0d..15b229b9a7 100644 --- a/tests/unit/utils/test_venvs.py +++ b/tests/unit/utils/test_venvs.py @@ -14,14 +14,14 @@ import os from unittest.mock import patch from tempfile import TemporaryDirectory -from nemo_reinforcer.utils.venvs import create_local_venv +from nemo_rl.utils.venvs import create_local_venv import subprocess def test_create_local_venv(): with TemporaryDirectory() as tempdir: - # Mock os.environ to set REINFORCER_VENV_DIR for this test - with patch.dict(os.environ, {"REINFORCER_VENV_DIR": tempdir}): + # Mock os.environ to set NEMO_RL_VENV_DIR for this test + with patch.dict(os.environ, {"NEMO_RL_VENV_DIR": tempdir}): venv_python = create_local_venv( py_executable="uv run --group docs", venv_name="test_venv" ) diff --git a/tools/autoformat.sh b/tools/autoformat.sh index c18d31e4b0..a7fb5931ce 100644 --- a/tools/autoformat.sh +++ b/tools/autoformat.sh @@ -24,7 +24,7 @@ if [[ $GIT_MAJOR -eq 2 && $GIT_MINOR -lt 31 ]]; then fi SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) -PACKAGE_ROOT=$(realpath $SCRIPT_DIR/../nemo_reinforcer) +PACKAGE_ROOT=$(realpath $SCRIPT_DIR/../nemo_rl) ruff check $PACKAGE_ROOT --fix ruff format $PACKAGE_ROOT \ No newline at end of file diff --git a/tools/copyright.sh b/tools/copyright.sh index bebab2d45f..927d148af2 100755 --- a/tools/copyright.sh +++ b/tools/copyright.sh @@ -19,7 +19,7 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) # Move to the project root cd $SCRIPT_DIR/.. find_files_with_missing_copyright() { -find ./nemo_reinforcer/ ./docs/*.py ./examples/ ./tests/ ./tools/ -type f -name '*.py' | while read path; do +find ./nemo_rl/ ./docs/*.py ./examples/ ./tests/ ./tools/ -type f -name '*.py' | while read path; do echo -en $path"\t" head -2 $path | grep -iv 'coding=' | head -1 done \ diff --git a/uv.lock b/uv.lock index 9902633207..b01626eaed 100644 --- a/uv.lock +++ b/uv.lock @@ -1807,7 +1807,7 @@ wheels = [ ] [[package]] -name = "nemo-reinforcer" +name = "nemo-rl" source = { editable = "." } dependencies = [ { name = "accelerate" },