verl-project · chenhaiq · Nov 25, 2025 · Sep 23, 2025 · Sep 23, 2025 · Sep 23, 2025
diff --git a/.gitignore b/.gitignore
@@ -21,7 +21,7 @@ tensorflow/my_graph/*
 
 # Distribution / packaging
 .Python
-env/
+# env/
 build/
 develop-eggs/
 dist/

diff --git a/docker/Dockerfile.isaaclab230 b/docker/Dockerfile.isaaclab230
@@ -0,0 +1,150 @@
+
+#FROM nvcr.nju.edu.cn/nvidia/isaac-lab:2.3.0
+FROM isaac-lab-base:latest
+
+ENV ACCEPT_EULA=Y
+ENTRYPOINT []
+
+# desktop
+RUN --mount=type=cache,target=/var/cache/apt \
+    sed -i 's/archive.ubuntu.com/mirrors.ivolces.com/g' /etc/apt/sources.list && \
+    sed -i 's/security.ubuntu.com/mirrors.ivolces.com/g' /etc/apt/sources.list && \
+    apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y locales && \
+    locale-gen en_US.UTF-8 && \
+    update-locale LANG=en_US.UTF-8 LC_CTYPE=en_US.UTF-8 && \
+    apt-get install -y wget curl \
+        xfce4 \
+        xfce4-goodies \
+        xorg \
+        dbus-x11 \
+        x11-xserver-utils \
+        tigervnc-standalone-server \
+        tigervnc-common \
+        tigervnc-tools \
+        fonts-dejavu \
+        fonts-liberation
+# cuda 12.2
+RUN --mount=type=cache,target=/var/cache/apt \
+    cd /tmp && \
+    wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \
+    apt-key add 3bf863cc.pub && \
+    echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \
+    apt-get update && \
+    apt-get install -y libcusparselt0 libnccl2=2.27.3-1+cuda12.2 libglfw3 libgl1-mesa-glx libosmesa6 && \
+    rm -f 3bf863cc.pub
+
+# libero
+RUN --mount=type=cache,target=/root/.cache/pip \
+    /workspace/isaaclab/_isaac_sim/python.sh -m pip install easydict==1.9 robosuite==1.4.0 bddl==1.0.1 future==0.18.2 cloudpickle==2.1.0
+
+RUN --mount=type=cache,target=/root/.cache/pip \
+    /workspace/isaaclab/_isaac_sim/python.sh -m pip install transformers[hf_xet]
+
+RUN --mount=type=cache,target=/root/.cache/pip \
+    /workspace/isaaclab/_isaac_sim/python.sh -m pip install --upgrade numpy==1.26.4 ray[default] \
+    accelerate codetiming datasets dill hydra-core pandas peft pyarrow>=19.0.0 pybind11 pylatexenc
+
+# openvla-oft
+RUN --mount=type=cache,target=/root/.cache/pip \
+    /workspace/isaaclab/_isaac_sim/python.sh -m pip install pre-commit torchdata packaging>=20.0 uvicorn fastapi latex2sympy2_extended math_verify tensorboard
+
+
+# flash_attn
+RUN cd /tmp && \
+    wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.0.post2/flash_attn-2.8.0.post2+cu12torch2.7cxx11abiFALSE-cp311-cp311-linux_x86_64.whl && \
+    /workspace/isaaclab/_isaac_sim/python.sh -m pip install /tmp/flash_attn-2.8.0.post2+cu12torch2.7cxx11abiFALSE-cp311-cp311-linux_x86_64.whl && \
+    rm -f /tmp/flash_attn-2.8.0.post2+cu12torch2.7cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
+
+RUN --mount=type=cache,target=/root/.cache/pip \
+    /workspace/isaaclab/_isaac_sim/python.sh -m pip install --upgrade protobuf==3.20.3 timm==0.9.16
+
+RUN --mount=type=cache,target=/root/.cache/pip \
+    /workspace/isaaclab/_isaac_sim/python.sh -m pip install orjson==3.11.3 pyvers==0.1.0 tensordict==0.10.0 --force --no-deps
+
+
+RUN mkdir -p /root/.vnc && \
+    cat <<'EOP' > /root/.vnc/xstartup 
+#!/bin/sh
+unset SESSION_MANAGER
+unset DBUS_SESSION_BUS_ADDRESS
+[ -r \$HOME/.Xresources ] && xrdb \$HOME/.Xresources
+xsetroot -solid grey
+exec startxfce4
+EOP
+
+RUN cat <<'EOP' > /root/.vnc/config
+geometry=1920x1080
+depth=24
+desktop=Isaac-Sim-Desktop
+dpi=96
+localhost=no
+EOP
+
+RUN cat <<'EOP' > /root/start_isaac_vnc.sh
+#!/bin/bash
+# 设置显示变量
+export DISPLAY=:1
+
+# 检查VNC是否运行
+if ! pgrep -f "Xvnc.*:1" > /dev/null; then
+    echo "Starting VNC server..."
+    vncserver :1 -localhost no -geometry 1920x1080 -depth 24 -desktop "Isaac-Sim-Desktop"
+    sleep 3
+fi
+
+# 启动Isaac Sim
+echo "Starting Isaac Sim..."
+/workspace/isaaclab/_isaac_sim/isaac-sim.sh --allow-root
+EOP
+
+RUN chmod +x /root/.vnc/xstartup && \
+    chmod +x /root/start_isaac_vnc.sh
+
+RUN /workspace/isaaclab/_isaac_sim/isaac-sim.sh --allow-root --ext-precache-mode
+
+RUN cd /root && \
+    git clone https://github.com/Lifelong-Robot-Learning/LIBERO.git && \
+    cd LIBERO && \
+    git apply <<'EOP'
+diff --git a/setup.py b/setup.py
+index 59d4900..dbe9811 100644
+--- a/setup.py
++++ b/setup.py
+@@ -13,7 +13,8 @@ long_description = "".join(lines)
+
+ setup(
+     name="libero",
+-    packages=[package for package in find_packages() if package.startswith("libero")],
++    #packages=[package for package in find_packages() if package.startswith("libero")],
++    packages=["libero"],
+     install_requires=[],
+     eager_resources=["*"],
+     include_package_data=True,
+EOP
+
+RUN cd /root/LIBERO && \
+    /workspace/isaaclab/_isaac_sim/python.sh -m pip install -e .
+
+# libero config
+RUN mkdir -p /root/.libero && \
+cat <<'EOP' > /root/.libero/config.yaml
+assets: /root/LIBERO/libero/libero/./assets
+bddl_files: /root/LIBERO/libero/libero/./bddl_files
+benchmark_root: /root/LIBERO/libero/libero
+datasets: /root/LIBERO/libero/libero/../datasets
+init_states: /root/LIBERO/libero/libero/./init_files
+EOP
+
+# from https://github.com/nvidia-china-sae/RobotLearningLab
+COPY RobotLearningLab/ /root/RobotLearningLab/
+
+RUN cd /workspace/isaaclab/ && \
+    rm -rf source && \
+    ln -s /root/RobotLearningLab/source source && \
+    /workspace/isaaclab/_isaac_sim/python.sh -m pip install -e ./source/isaaclab
+# Ray cmd
+RUN /workspace/isaaclab/_isaac_sim/python.sh -m pip install colorama && \
+cat <<'EOP' >> /root/.bashrc
+alias ray='/workspace/isaaclab/_isaac_sim/python.sh /workspace/isaaclab/_isaac_sim/kit/python/lib/python3.11/site-packages/ray/scripts/scripts.py'
+EOP
@@ -153,12 +153,12 @@ https://excalidraw.com/#json=pfhkRmiLm1jnnRli9VFhb,Ut4E8peALlgAUpr7E5pPCA
 How to generate ray timeline to analyse performance of a training job?
 ------------------------------------------------------------------------------------------
 
-To generate the ray timeline file, you can set the config term ``ray_init.timeline_file`` to a json file path.
+To generate the ray timeline file, you can set the config term ``ray_init.timeline_json_file`` to a json file path.
 For example:
 
 .. code:: bash
 
-    ray_init.timeline_file=/tmp/ray_timeline.json
+    ray_init.timeline_json_file=/tmp/ray_timeline.json
 
 The file will be generated in the specified path at the end of a training job.
 You can use tools like chrome://tracing or the Perfetto UI and view the ray timeline file.

diff --git a/recipe/vla/README.md b/recipe/vla/README.md
@@ -0,0 +1,67 @@
+# [WIP] Experimental VLA RL Support
+
+This recipe introduces experimental support for training SimpleVLA-OFT, a VLA model.
+
+A key challenge in VLA RL training, which differs from standard LLM RL training, is that the environment/simulation phase has a higher computational overhead than the generation phase. To achieve high efficiency, RL in this context requires an effective environment scheduling mechanism in addition to verl's existing efficient training and inference scheduling. The goal is to reduce the inefficiency caused by the environment and the model's generation process waiting on each other.
+
+The core computational model of this PR is inspired by the pipeline parallelism design from RLinf. It aims to overlap the environment's execution time with the model's generation time, thereby maximizing environment utilization.
+
+This PR also proposes a future direction: creating a unified `Env` class. This class would encapsulate functionalities like tool calling, MCP, etc., under a single interface. The environment would manage its state internally, allowing the agent to communicate simply by calling `step(action)` to submit an action and receive an observation.
+
+Currently, this code is located independently within the `recipes` folder. Much of the design is tightly coupled with the SimpleVLA model and the Libero environment, serving as an initial version for demonstration and discussion.
+
+## Supported Simulators
+
+| Simulator | Env Name |  Difference | Benchmark data source |
+| --- | --- | --- | --- | 
+| Mujoco | LiberoEnv | 1. init task from init_states in Libero dataset<br>2. each env can have different tasks | https://github.com/Lifelong-Robot-Learning/LIBERO |
+| IsaacSim | IsaacEnv  | 1. init task from random states, which has more variety than init_states in dataset<br>2. each sim process must using the same task for its envs | https://huggingface.co/datasets/china-sae-robotics/IsaacLabPlayGround_Dataset |
+
+## Hardware Requirements
+
+*   Simulator GPU: NVIDIA L20 or L40 with 48GB memory and RT Cores
+
+Notes: 
+1. Mujoco can failback to CPU mode with degraded performance if no RT Cores is available
+2. IsaacSim only support GPU with RT Cores
+3. RTX GPU will be supported in the future release with remote deployment feature, but it can not work with colocated mode because of the limitation of GPU memory capacity.
+
+## Docker image
+
+The Isaac Lab support for libero dataset depends on RobotLearningLab project from The Isaac Lab Project Developers team. The project is in the process of being public available and is currently build in this image with BSD-3-Clause license. 
+
+`recipe/vla/run_simpleVLA_libero_grpo.sh` is the example of training SimpleVLA-OFT with this image:
+
+`vemlp-cn-shanghai.cr.volces.com/preset-images/verl_vla:preview_vla_0.1`
+
+## Disaggregation Mode for Train-Rollout / Simulation
+
+Disaggregate Train-Rollout workers and Simulation workers into different nodes.
+
+To enable disaggregation mode for Train-Rollout nodes and Simulation nodes, we need to establish ray connection before running verl.
+* On Train-Rollout node (default main node):
+```shell
+ray start --head --dashboard-host=0.0.0.0 --resources='{"train_rollout": 1}'
+```
+* On Simulation node:
+```shell
+ray start --address='<main_node_ip>:6379' --resources='{"sim": 1}'
+```
+
+Then run verl on main node **only**. See `run_simpleVLA_isaac_disagg.sh` for example.
+- `env.disagg_sim.enable=True` enable disagg mode
+- `trainer.n_env_gpus_per_node` GPUs for simulaton per node
+- `trainer.n_rollout_gpus_per_node` GPUs for train-rollout node
+- `env.disagg_sim.nnodes` sim node num
+- `trainer.nnodes` train-rollout node num
+
+*Tips: you can run the following command on the sim node to check whether sim workers are scheduled up*
+```shell
+python -c "import ray; ray.init(address=\"<main_node_ip>:6379\"); print(ray._private.state.available_resources_per_node())"
+```
+*If you see output pattern like "'train_rollout': 0.9992" and "'sim': 0.9992", the sim workers are scheduled up successfully*
+*The actual value depends on your GPUs per node, usually <1 - 1e-4 * num_gpus>*
+
+**References:**
+*   [https://github.com/PRIME-RL/SimpleVLA-RL](https://github.com/PRIME-RL/SimpleVLA-RL)
+*   [https://github.com/RLinf/RLinf](https://github.com/RLinf/RLinf)
diff --git a/recipe/vla/config/rob_ppo_trainer.yaml b/recipe/vla/config/rob_ppo_trainer.yaml
@@ -0,0 +1,57 @@
+# the rob_ppo config will override default ppo_trainer.yaml
+
+hydra:
+  searchpath:
+    - file://verl/trainer/config
+
+defaults:
+  - ppo_trainer
+  - _self_
+
+env:
+  rollout:
+    pipeline_stage_num: 2
+  actor:
+    model:
+      num_action_chunks: 8
+      action_dim: 7
+  train:
+    simulator_type: libero
+    max_episode_steps: 512
+    reward_coef: 1.0
+    only_eval: False
+    video_cfg:
+      save_video: True
+      video_base_dir: /tmp/videos
+    num_envs: 16
+    seed: 42
+    task_suite_name: libero_10
+    init_params:
+      camera_depths: False
+      camera_heights: 256
+      camera_widths: 256
+      camera_names: 
+        - agentview
+        - robot0_eye_in_hand
+  disagg_sim:
+    enable: False
+    nnodes: 1
+
+
+actor_rollout_ref:
+  actor:
+    num_images_in_input: 1
+    traj_mini_batch_size: 16
+    fsdp_config:
+      wrap_policy:
+        transformer_layer_cls_to_wrap: 
+          - PrismaticProjector
+          - LlamaDecoderLayer
+        min_num_params: 0
+      param_offload: False
+      optimizer_offload: False
+      forward_prefetch: True
+      fsdp_size: -1
+  rollout:
+    mode: async_envloop
+    prompt_length: 512
-Original file line number
+Diff line change
@@ Expand Up / @@ -21,7 +21,7 @@ tensorflow/my_graph/* @@
     # Distribution / packaging
     .Python
-    env/
+    # env/
     build/
     develop-eggs/
     dist/
@@ Expand Down @@