Jianbing-D · ETOgaosion · Jun 8, 2025 · May 23, 2025 · May 23, 2025 · May 23, 2025
diff --git a/.github/workflows/checkpoint_converter.yml b/.github/workflows/checkpoint_converter.yml
@@ -14,15 +14,22 @@ on:
       - v0.*
     paths:
       - "**/*.py"
-      # Entrypoints
-      - ".github/workflows/checkpoint_converter.yml"
-      - "!examples"
+      # Other entrypoints
+      - "!examples/**"
+      - "!tests/**"
       - "!verl/trainer/main_*.py"
       - "!verl/trainer/fsdp_sft_trainer.py"
       # Recipes
-      - "!recipe"
+      - "!recipe/**"
       # FSDP
       - "!verl/workers/**/*dp_*.py"
+      # Entrypoints
+      - ".github/workflows/checkpoint_converter.yml"
+      - ".github/workflows/e2e_ppo_trainer_megatron.yml"
+      - "examples/data_preprocess/gsm8k.py"
+      - "tests/e2e/run_ppo_trainer_megatron.sh"
+      - "verl/trainer/main_ppo.py"
+      - "verl/trainer/config/ppo_megatron_trainer.yaml"
 
 
 # Cancel jobs on the same ref if a new one is triggered

diff --git a/.github/workflows/e2e_prime.yml → .github/workflows/disabled/e2e_prime.yml b/.github/workflows/e2e_prime.yml → .github/workflows/disabled/e2e_prime.yml
@@ -5,12 +5,10 @@ on:
   # but only for the main branch
   push:
     branches:
-      - main
-      - v0.*
+      - disabled_ci
   pull_request:
     branches:
-      - main
-      - v0.*
+      - disabled_ci
     paths:
       - "**/*.py"
       # Other entrypoints

diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml
@@ -26,9 +26,9 @@ jobs:
   test:
     name: verl Ascend test (self-host)
     runs-on: [self-hosted, npu-0]
-    timeout-minutes: 5 # Increase this timeout value as needed
+    timeout-minutes: 30 # Increase this timeout value as needed
     container:
-      image: quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10
+      image: quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
       volumes:
         - /usr/local/dcmi:/usr/local/dcmi
         - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
@@ -42,13 +42,56 @@ jobs:
         --device /dev/hisi_hdc
         --privileged
         --network "host"
+        --shm-size 2g
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+      HF_ENDPOINT: "https://hf-mirror.com"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
     steps:
       - name: Check npu and CANN info
         run: |
           cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
           npu-smi info
       - name: Checkout volcengine/verl repo
         uses: actions/checkout@v4
-      - name: Run test
+      - name: Install torch
         run: |
-          lscpu
+          pip install torch==2.5.1+cpu  --index-url https://download.pytorch.org/whl/cpu
+          pip install torch-npu==2.5.1
+          pip install /usr/local/Ascend/ascend-toolkit/latest/lib64/te-0.4.0-py3-none-any.whl
+      - name: Install vllm
+        run: |
+          apt-get update && apt-get install -y git
+          git clone -b v0.7.3 --depth 1 https://github.com/vllm-project/vllm.git vllm-npu
+          cd vllm-npu
+          pip install -r requirements-build.txt
+          VLLM_TARGET_DEVICE=empty pip install -e . --extra-index https://download.pytorch.org/whl/cpu/
+      - name: Install vllm-ascend
+        run: |
+          pip list
+          pip show torch
+          git clone -b v0.7.3 --depth 1 https://github.com/vllm-project/vllm-ascend.git
+          cd vllm-ascend
+          export COMPILE_CUSTOM_KERNELS=1
+          python setup.py install
+      - name: Install the current repository
+        run: |
+          pip3 install hf_transfer peft
+          pip3 install -r requirements-npu.txt
+          pip install -e .
+      - name: Prepare gsm8k dataset
+        run: |
+          ray stop --force
+          python3 examples/data_preprocess/gsm8k.py
+      - name: Running gsm8k e2e training tests with LoRA on ASCEND NPU
+        run: |
+          ray stop --force
+          bash tests/e2e/sft/run_sft.sh
+          rm -rf $HOME/ckpts
+      - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU
+        run: |
+          ray stop --force
+          bash tests/npu/run_qwen2_5_05b_grpo.sh
+          rm -rf $HOME/ckpts
diff --git a/.github/workflows/e2e_dapo.yml b/.github/workflows/e2e_dapo.yml
@@ -23,7 +23,7 @@ on:
       # Megatron
       - "!verl/workers/**/megatron_*.py"
       # Home
-      - "recipe/dapo/src"
+      - "recipe/dapo"
       # Entrypoints
       - ".github/workflows/e2e_dapo.yml"
       - "examples/data_preprocess/gsm8k.py"
@@ -34,7 +34,6 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 
-
 # Declare permissions just read content.
 permissions:
   contents: read

diff --git a/.github/workflows/e2e_ppo_trainer.yml b/.github/workflows/e2e_ppo_trainer.yml
@@ -61,7 +61,7 @@ jobs:
 
   e2e_ppo_trainer_vllm:
     runs-on: [L20x8]
-    timeout-minutes: 40 # Increase this timeout value as needed
+    timeout-minutes: 60 # Increase this timeout value as needed
     env:
       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
@@ -148,6 +148,14 @@ jobs:
         run: |
           ray stop --force
           LIGER=True bash tests/e2e/ppo_trainer/run_model_reward.sh
+      - name: Running GSM8K E2E with rmpad using model rm with Fused Kernel enabled
+        run: |
+          ray stop --force
+          FUSED_KERNELS=True bash tests/e2e/ppo_trainer/run_model_reward.sh
+      - name: Running GSM8K E2E with rmpad using model rm with Fused Kernel enabled
+        run: |
+          ray stop --force
+          FUSED_KERNEL=True FUSED_KERNEL_BACKEND=triton bash tests/e2e/ppo_trainer/run_model_reward.sh
 
   e2e_ppo_trainer_vllm_vlm:
     runs-on: [L20x8]
@@ -182,6 +190,27 @@ jobs:
             MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
             ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
             bash tests/e2e/ppo_trainer/run_function_reward.sh
+      - name: Running Geo3k VLM E2E with rmpad using fused kernel (Qwen2.5-VL)
+        run: |
+          ray stop --force
+          FUSED_KERNELS=True TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
+            MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
+            MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
+            ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
+            GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
+            ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
+            bash tests/e2e/ppo_trainer/run_function_reward.sh
+      - name: Running Geo3k VLM E2E with rmpad using fused kernel (Qwen2.5-VL)
+        run: |
+          ray stop --force
+          FUSED_KERNELS=True FUSED_KERNEL_BACKEND=triton \
+            TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
+            MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
+            MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
+            ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
+            GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
+            ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
+            bash tests/e2e/ppo_trainer/run_function_reward.sh
 
   e2e_ppo_trainer_sglang:
     runs-on: [L20x8]
@@ -269,11 +298,15 @@ jobs:
         run: |
           ray stop --force
           bash tests/e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh
+      - name: Running GSM8K with tool E2E training tests with FSDP2
+        run: |
+          ray stop --force
+          FSDP_STRATEGY=fsdp2 bash tests/e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh
 
   e2e_ppo_trainer_sglang_vlm:
     runs-on: [L20x8]
     needs: pre_commit_for_ppo
-    timeout-minutes: 40 # Increase this timeout value as needed
+    timeout-minutes: 60 # Increase this timeout value as needed
     env:
       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
@@ -305,3 +338,24 @@ jobs:
             ENGINE=sglang GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
             ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
             bash tests/e2e/ppo_trainer/run_function_reward.sh
+      - name: Running Geo3k VLM E2E with rmpad using fused kernel (Qwen2.5-VL)
+        run: |
+          ray stop --force
+          FUSED_KERNELS=True TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
+            MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
+            MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
+            ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
+            ENGINE=sglang GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
+            ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
+            bash tests/e2e/ppo_trainer/run_function_reward.sh
+      - name: Running Geo3k VLM E2E with rmpad using fused kernel (Qwen2.5-VL)
+        run: |
+          ray stop --force
+          FUSED_KERNELS=True FUSED_KERNEL_BACKEND=triton \
+            TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
+            MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
+            MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
+            ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
+            ENGINE=sglang GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
+            ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
+            bash tests/e2e/ppo_trainer/run_function_reward.sh