janhq · nguyenhoangthuan99 · Dec 18, 2025 · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -6,7 +6,7 @@
 
 - [ ] Search for similar PRs. Paste at least one query link here: ...
 - [ ] Format the PR title as `[{modules}] {type}: {description}` (This will be checked by the CI)
-  - `{modules}` include `fsdp`, `megatron`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data`
+  - `{modules}` include `fsdp`, `megatron`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data`, `cfg`, `reward`
   - If this PR involves multiple modules, separate them with `,` like `[megatron, fsdp, doc]`
   - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test`
   - If this PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title.

diff --git a/.github/workflows/checkpoint_converter.yml b/.github/workflows/checkpoint_converter.yml
diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml
@@ -65,22 +65,24 @@ permissions:
   contents: read
 
 jobs:
-  test:
+  non_rl_job:
     if: github.repository_owner == 'volcengine'
-    name: verl Ascend test (self-host)
-    runs-on: linux-aarch64-a2-8
-    timeout-minutes: 60 # Increase this timeout value as needed
+    name: E2E Ascend testing for non-RL algorithm scenarios
+    runs-on: linux-aarch64-a2-2
+    timeout-minutes: 60
     container:
       image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.3.rc1-910b-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:
-      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
-      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
-      NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
       HF_ENDPOINT: "https://hf-mirror.com"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
     steps:
+      - name: Config third-party dependency download cache
+        run: |
+          sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
+          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
+          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
       - name: Check npu and CANN info
         run: |
           cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
@@ -103,47 +105,126 @@ jobs:
       - name: Preprocess gsm8k dataset
         run: |
           python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k
-      - name: Preprocess geo3k dataset
-        run: |
-          python examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/.cache/datasets/hiyouga/geometry3k
-      - name: Running gsm8k e2e qwen3 training tests with PPO on ASCEND NPU
-        run: |
-          ray stop --force
-          bash tests/special_npu/run_qwen3_06b_ppo.sh
-          rm -rf $HOME/ckpts
       - name: Running gsm8k e2e training tests with peft sft on ASCEND NPU
         run: |
           ray stop --force
           bash tests/special_npu/run_qwen2_5_05b_sft_peft_sp2.sh
           rm -rf $HOME/ckpts
-      - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU
+      - name: Running NPU profiling unit tests
         run: |
           ray stop --force
-          bash tests/special_npu/run_qwen2_5_05b_grpo.sh
-          rm -rf $HOME/ckpts
-      - name: Running geo3k e2e training tests with GRPO on ASCEND NPU
+          pytest -s -x tests/utils/test_special_mstx_profile.py
+
+  llm_rl_job:
+    if: github.repository_owner == 'volcengine'
+    name: E2E Ascend testing for RL training scenarios of LLM models
+    runs-on: linux-aarch64-a2-8
+    timeout-minutes: 60
+    container:
+      image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.3.rc1-910b-ubuntu22.04-py3.11-latest
+      options: >-
+        --shm-size 16g
+    env:
+      HF_ENDPOINT: "https://hf-mirror.com"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+    steps:
+      - name: Config third-party dependency download cache
+        run: |
+          sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
+          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
+          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
+      - name: Check npu and CANN info
+        run: |
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+          npu-smi info
+      - name: Check initial pip list from image
+        run: |
+          pip list
+      - name: Checkout volcengine/verl repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          clean: true
+      - name: Install the current repository
+        run: |
+          pip install -r requirements-npu.txt
+          pip install -e .
+      - name: Check final pip list
+        run: |
+          pip list
+      - name: Preprocess gsm8k dataset
+        run: |
+          python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k
+      - name: Running gsm8k e2e training tests with PPO on ASCEND NPU (FSDP backend)
         run: |
           ray stop --force
-          bash tests/special_npu/run_qwen2_5_vl_3b_npu.sh
+          bash tests/special_npu/run_qwen3_06b_ppo.sh
           rm -rf $HOME/ckpts
-      - name: Running gsm8k e2e training tests with DAPO on ASCEND NPU
+      - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (FSDP backend)
         run: |
           ray stop --force
-          bash tests/special_npu/run_qwen2_5_05b_dapo.sh
+          bash tests/special_npu/run_qwen2_5_05b_grpo.sh
           rm -rf $HOME/ckpts
-      - name: Running gsm8k e2e qwen3 MoE training tests with DAPO MindSpeed on ASCEND NPU
+      - name: Running gsm8k e2e training tests with DAPO on ASCEND NPU (FSDP backend)
         run: |
           ray stop --force
-          export PYTHONPATH=$PYTHONPATH:/Megatron-LM
-          USE_DIST_CKPT=True USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen3moe_minimal.json DUMMY_MODEL_PATH=$HOME/dist_ckpt/qwen3_30b_dapo_mindspeed bash tests/special_npu/run_qwen3_30b_dapo_mindspeed.sh
-      - name: Running gsm8k e2e training tests with GRPO MindSpeed on ASCEND NPU
+          bash tests/special_npu/run_qwen2_5_05b_dapo.sh
+          rm -rf $HOME/ckpts
+      - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeed backend)
         run: |
           ray stop --force
           export PYTHONPATH=$PYTHONPATH:/Megatron-LM
           USE_DIST_CKPT=True bash tests/special_npu/run_qwen2_5_05b_grpo_mindspeed.sh
           rm -rf $HOME/dist_ckpt/qwen2_5_05b_grpo_mindspeed
           rm -rf $HOME/ckpts
-      - name: Running NPU profiling unit tests
+      - name: Running gsm8k e2e training tests with DAPO on ASCEND NPU (MindSpeed backend, MoE Model)
         run: |
           ray stop --force
-          pytest -s -x tests/utils/test_special_mstx_profile.py
+          export PYTHONPATH=$PYTHONPATH:/Megatron-LM
+          USE_DIST_CKPT=True USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen3moe_minimal.json DUMMY_MODEL_PATH=$HOME/dist_ckpt/qwen3_30b_dapo_mindspeed bash tests/special_npu/run_qwen3_30b_dapo_mindspeed.sh
+
+  vlm_rl_job:
+    if: github.repository_owner == 'volcengine'
+    name: E2E Ascend testing for RL training scenarios of VLM models
+    runs-on: linux-aarch64-a2-8
+    timeout-minutes: 60
+    container:
+      image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.3.rc1-910b-ubuntu22.04-py3.11-latest
+      options: >-
+        --shm-size 16g
+    env:
+      HF_ENDPOINT: "https://hf-mirror.com"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+    steps:
+      - name: Config third-party dependency download cache
+        run: |
+          sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
+          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
+          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
+      - name: Check npu and CANN info
+        run: |
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+          npu-smi info
+      - name: Check initial pip list from image
+        run: |
+          pip list
+      - name: Checkout volcengine/verl repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          clean: true
+      - name: Install the current repository
+        run: |
+          pip install -r requirements-npu.txt
+          pip install -e .
+      - name: Check final pip list
+        run: |
+          pip list
+      - name: Preprocess geo3k dataset
+        run: |
+          python examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/.cache/datasets/hiyouga/geometry3k
+      - name: Running geo3k e2e training tests with GRPO on ASCEND NPU
+        run: |
+          ray stop --force
+          bash tests/special_npu/run_qwen2_5_vl_3b_npu.sh
+          rm -rf $HOME/ckpts