diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml
index cf88963a325..a24b1cf4687 100644
--- a/.github/workflows/_e2e_test.yaml
+++ b/.github/workflows/_e2e_test.yaml
@@ -228,6 +228,14 @@ jobs:
           #pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_shared_expert_dp.py
           pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_single_request_aclgraph.py
 
+      - name: Run vllm-project/vllm-ascend test (non triton)
+        if: ${{ inputs.type == 'full' }}
+        env:
+          VLLM_WORKER_MULTIPROC_METHOD: spawn
+        run: |
+          python3 -m pip uninstall -y triton-ascend
+          pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py
+
   e2e-4-cards:
     name: multicard-4
     needs: [e2e-2-cards]
diff --git a/Dockerfile b/Dockerfile
index aadb6ea2cdd..2c43ba8b03c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -64,6 +64,14 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip cache purge
 
+# Install clang-15 (for triton-ascend)
+RUN apt-get update -y && \
+    apt-get -y install clang-15 && \
+    update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \
+    update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \
+    rm -rf /var/cache/apt/* && \
+    rm -rf /var/lib/apt/lists/*
+
 # Install modelscope (for fast download) and ray (for multinode)
 RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
     python3 -m pip cache purge
diff --git a/Dockerfile.a3 b/Dockerfile.a3
index 960e7ef563a..c6b48709e24 100644
--- a/Dockerfile.a3
+++ b/Dockerfile.a3
@@ -63,6 +63,14 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip cache purge
 
+# Install clang-15 (for triton-ascend)
+RUN apt-get update -y && \
+    apt-get -y install clang-15 && \
+    update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \
+    update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \
+    rm -rf /var/cache/apt/* && \
+    rm -rf /var/lib/apt/lists/*
+
 # Install modelscope (for fast download) and ray (for multinode)
 RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
     python3 -m pip cache purge
diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler
index f5c83bfe9eb..75896e8a0d9 100644
--- a/Dockerfile.a3.openEuler
+++ b/Dockerfile.a3.openEuler
@@ -66,6 +66,11 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip cache purge
 
+# Install clang (for triton-ascend)
+RUN yum update -y && \
+    yum install -y clang && \
+    rm -rf /var/cache/yum/*
+
 # Install modelscope (for fast download) and ray (for multinode)
 RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
     python3 -m pip cache purge
diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler
index c93cddf0b4c..e634a3e95c2 100644
--- a/Dockerfile.openEuler
+++ b/Dockerfile.openEuler
@@ -66,6 +66,11 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip cache purge
 
+# Install clang (for triton-ascend)
+RUN yum update -y && \
+    yum install -y clang && \
+    rm -rf /var/cache/yum/*
+
 # Install modelscope (for fast download) and ray (for multinode)
 RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
     python3 -m pip cache purge
diff --git a/docs/source/tutorials/DeepSeek-V3.2.md b/docs/source/tutorials/DeepSeek-V3.2.md
index af42abe6a48..a20154d2554 100644
--- a/docs/source/tutorials/DeepSeek-V3.2.md
+++ b/docs/source/tutorials/DeepSeek-V3.2.md
@@ -31,18 +31,6 @@ If you want to deploy multi-node environment, you need to verify multi-node comm
 
 You can using our official docker image to run `DeepSeek-V3.2` directly..
 
-:::{note}
-We strongly recommend you to install clang make triton ascend stable enough. For Ubuntu, the command is
-
-```bash
-apt-get -y clang-15
-
-update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
-update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
-```
-
-:::
-
 :::::{tab-set}
 :sync-group: install
 
diff --git a/docs/source/tutorials/Qwen3-235B-A22B.md b/docs/source/tutorials/Qwen3-235B-A22B.md
index 0298dab70a0..64ff19b8c14 100644
--- a/docs/source/tutorials/Qwen3-235B-A22B.md
+++ b/docs/source/tutorials/Qwen3-235B-A22B.md
@@ -326,15 +326,7 @@ In this section, we provide simple scripts to re-produce our latest performance.
 - CANN 8.3.RC2
 - torch_npu 2.8.0
 - HDK/driver 25.3.RC1
-- triton_ascend 3.2.0.dev2025110717
-
-**Notice:**
-triton_ascend is required for reproducing best performance of Qwen3-235B in vLLM-Ascend. If it is not installed in your environment, please follow the instructions below:
-
-```bash
-wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl
-pip install triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl
-```
+- triton_ascend 3.2.0
 
 ### Single Node A3 （64G*16）
 
diff --git a/docs/source/tutorials/Qwen3-Next.md b/docs/source/tutorials/Qwen3-Next.md
index 74341dd6064..043ae359471 100644
--- a/docs/source/tutorials/Qwen3-Next.md
+++ b/docs/source/tutorials/Qwen3-Next.md
@@ -53,17 +53,6 @@ docker run --rm \
 
 The Qwen3 Next is using [Triton Ascend](https://gitee.com/ascend/triton-ascend) which is currently experimental. In future versions, there may be behavioral changes related to stability, accuracy, and performance improvement.
 
-### Install Clang
-
-We strongly recommend you to install clang make triton ascend stable enough. For Ubuntu, the command is
-
-```bash
-apt-get -y clang-15
-
-update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
-update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
-```
-
 ### Inference
 
 :::::{tab-set}
diff --git a/tests/e2e/nightly/multi_node/scripts/run.sh b/tests/e2e/nightly/multi_node/scripts/run.sh
index 95a7b9dc4e3..f42b325e04a 100644
--- a/tests/e2e/nightly/multi_node/scripts/run.sh
+++ b/tests/e2e/nightly/multi_node/scripts/run.sh
@@ -125,13 +125,12 @@ install_extra_components() {
     echo "====> Extra components installation completed"
 }
 
-install_clang() {
-    echo "====> Installing clang-15"
-    apt-get update && apt-get install -y clang-15
-    update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
-    update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
+
+show_triton_ascend_info() {
+    echo "====> Check triton ascend info"
     clang -v
-    echo "====> Clang-15 installation completed"
+    which bishengir-compile
+    pip show triton-ascend
 }
 
 kill_npu_processes() {
@@ -161,7 +160,7 @@ main() {
     check_npu_info
     check_and_config
     show_vllm_info
-    install_clang
+    show_triton_ascend_info
     if [[ "$CONFIG_YAML_PATH" == *"DeepSeek-V3_2-Exp-bf16.yaml" ]]; then
         install_extra_components
     fi