huggingface · sergiopaniego · Feb 4, 2026 · Dec 17, 2025 · Dec 18, 2025 · Dec 20, 2025
diff --git a/examples/scripts/nemo_gym/README.md b/examples/scripts/nemo_gym/README.md
@@ -0,0 +1,33 @@
+# Post-training with NeMo Gym and TRL
+
+This integration supports training language models in NeMo-Gym environments using TRL GRPO. Both single step and multi step tasks are supported, including multi-environment training. NeMo-Gym orchestrates rollouts, returning token ids and logprobs to TRL through the rollout function for training. Currently this integration is only supported through TRL's vllm server mode. 
+
+## Interactive single node 
+
+1. Launch vLLM server:
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 trl vllm-serve \
+  --model Qwen/Qwen3-4B-Instruct-2507 \
+  --tensor-parallel-size 4 \
+  --max-model-len 8192 \
+  --trust-remote-code
+```
+
+2. Start NeMo Gym servers
+```
+ng_run "+config_paths=[resources_servers/workplace_assistant/configs/workplace_assistant.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]"
+```
+
+
+3. Run training:
+```bash
+CUDA_VISIBLE_DEVICES=4 python train.py --config config.yaml
+```
+
+## Multinode with slurm
+
+See submit.sh for a multinode example!
+
+## Multi environment training
+
+Docs coming soon! 
diff --git a/examples/scripts/nemo_gym/config.yaml b/examples/scripts/nemo_gym/config.yaml
@@ -0,0 +1,32 @@
+model_name: "Qwen/Qwen3-4B-Instruct-2507"
+
+dataset_path: "data/train.jsonl"
+eval_dataset_path: "data/val.jsonl"
+
+output_dir: "outputs/nemo_gym"
+run_name_prefix: "nemo_gym"
+report_to: "wandb"
+project_name: "trl-nemo-gym"
+log_completions: true
+num_completions_to_print: 2
+
+learning_rate: 1.0e-5
+max_steps: 1000
+num_generations: 8
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 8
+max_seq_length: 16384
+warmup_steps: 5
+lr_scheduler_type: "linear"
+optim: "adamw_torch_fused"
+weight_decay: 0.0
+vllm_importance_sampling_correction: true
+
+temperature: 1.0
+top_p: 0.999
+
+save_steps: 10
+
+eval_strategy: "steps"
+eval_steps: 10
+
diff --git a/examples/scripts/nemo_gym/submit.sh b/examples/scripts/nemo_gym/submit.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+#SBATCH -A account
+#SBATCH -p partition
+#SBATCH -N 5
+#SBATCH --gres gpu:8
+#SBATCH --ntasks-per-node=1
+#SBATCH --cpus-per-task=16
+#SBATCH --time=4:00:00
+#SBATCH --job-name=trl_nemo_gym
+#SBATCH --output=logs/%j/slurm.out
+#SBATCH --error=logs/%j/slurm.err
+
+CONTAINER_IMAGE="nvcr.io/nvidia/pytorch:25.12-py3"
+MOUNTS="/path/to/mounts:/path/to/mounts"
+
+NODELIST=($(scontrol show hostnames $SLURM_JOB_NODELIST))
+
+TRAIN_NODE_0="${NODELIST[0]}"
+TRAIN_NODE_1="${NODELIST[1]}"
+TRAIN_NODE_2="${NODELIST[2]}"
+TRAIN_NODE_3="${NODELIST[3]}"
+VLLM_NODE="${NODELIST[4]}"
+
+echo "Training Nodes: $TRAIN_NODE_0, $TRAIN_NODE_1, $TRAIN_NODE_2, $TRAIN_NODE_3"
+echo "vLLM Node: $VLLM_NODE"
+echo "Main process IP: $TRAIN_NODE_0"
+
+LOG_DIR="logs/${SLURM_JOB_ID}"
+mkdir -p ${LOG_DIR}
+
+echo "Starting ng_run and vLLM on ${VLLM_NODE}..."
+echo "Logs will be saved to: ${LOG_DIR}"
+
+srun --nodes=1 --ntasks=1 --nodelist="${VLLM_NODE}" \
+    --container-image="${CONTAINER_IMAGE}" \
+    --container-mounts="${MOUNTS}" \
+    --container-mount-home \
+    bash -c "
+    LOG_DIR=/path/to/logs
+    mkdir -p \${LOG_DIR}
+
+    # Install uv if not already installed
+    curl -LsSf https://astral.sh/uv/install.sh | sh
+    source \$HOME/.local/bin/env
+
+    # Start nemo gym servers
+    (set -x && \
+    export HOME=/path/to/user && \
+    export PATH=\$HOME/.local/bin:\$PATH && \
+    cd /path/to/user/Gym && \
+    uv venv --python 3.12 && \
+    source .venv/bin/activate && \
+    uv sync && \
+    ray stop --force && \
+    ng_run +config_paths=[responses_api_models/vllm_model/configs/vllm_model.yaml,resources_servers/workplace_assistant/configs/workplace_assistant.yaml] +head_server.host=0.0.0.0) > \${LOG_DIR}/ng_run.log 2>&1 &
+
+    sleep 10
+
+    # Start trl vllm server
+    (set -x && \
+    export HOME=/path/to/user && \
+    export HF_HOME=/path/to/user/hf_home && \
+    cd /path/to/user/trl && \
+    source .venv/bin/activate && \
+    python -m trl.scripts.vllm_serve \
+    --model Qwen/Qwen3-4B-Instruct-2507 \
+    --host 0.0.0.0 \
+    --tensor-parallel-size 8 \
+    --data-parallel-size 1 \
+    --max-model-len 16384 \
+    --gpu-memory-utilization 0.7 \
+    --port 8000) > \${LOG_DIR}/vllm_serve.log 2>&1 &
+
+    wait
+" &
+
+echo "Waiting for nemo gym and vllm to start..."
+sleep 120
+
+echo "Launching training on 4 nodes..."
+
+TRAIN_NODES_LIST="${TRAIN_NODE_0},${TRAIN_NODE_1},${TRAIN_NODE_2},${TRAIN_NODE_3}"
+
+srun --nodes=4 --ntasks=4 --nodelist="${TRAIN_NODES_LIST}" \
+    --container-image="${CONTAINER_IMAGE}" \
+    --container-mounts="${MOUNTS}" \
+    --container-mount-home \
+    bash -c "
+    set -x && \
+    export HOME=/path/to/user && \
+    export HF_HOME=/path/to/user/hf_home && \
+    cd /path/to/user/trl && \
+    source .venv/bin/activate && \
+    cd examples/scripts/nemo_gym && \
+    accelerate launch \
+    --config_file deepspeed_zero3.yaml \
+    --num_processes 32 \
+    --num_machines 4 \
+    --machine_rank \$SLURM_PROCID \
+    --main_process_ip ${TRAIN_NODE_0} \
+    --main_process_port 29500 \
+    --rdzv_backend c10d \
+    train.py \
+    --config config.yaml \
+    --vllm_server_host ${VLLM_NODE} \
+    --head_server_host ${VLLM_NODE}" &
+
+wait
+