Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/_e2e_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,14 @@ jobs:
#pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_shared_expert_dp.py
pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_single_request_aclgraph.py

- name: Run vllm-project/vllm-ascend test (non triton)
if: ${{ inputs.type == 'full' }}
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
run: |
python3 -m pip uninstall -y triton-ascend
pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py

e2e-4-cards:
name: multicard-4
needs: [e2e-2-cards]
Expand Down
8 changes: 8 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
python3 -m pip cache purge

# Install clang-15 (for triton-ascend)
RUN apt-get update -y && \
apt-get -y install clang-15 && \
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \
rm -rf /var/cache/apt/* && \
rm -rf /var/lib/apt/lists/*

# Install modelscope (for fast download) and ray (for multinode)
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
python3 -m pip cache purge
Expand Down
8 changes: 8 additions & 0 deletions Dockerfile.a3
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
python3 -m pip cache purge

# Install clang-15 (for triton-ascend)
RUN apt-get update -y && \
apt-get -y install clang-15 && \
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \
rm -rf /var/cache/apt/* && \
rm -rf /var/lib/apt/lists/*

# Install modelscope (for fast download) and ray (for multinode)
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
python3 -m pip cache purge
Expand Down
5 changes: 5 additions & 0 deletions Dockerfile.a3.openEuler
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
python3 -m pip cache purge

# Install clang (for triton-ascend)
RUN yum update -y && \
yum install -y clang && \
rm -rf /var/cache/yum/*

# Install modelscope (for fast download) and ray (for multinode)
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
python3 -m pip cache purge
Expand Down
5 changes: 5 additions & 0 deletions Dockerfile.openEuler
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
python3 -m pip cache purge

# Install clang (for triton-ascend)
RUN yum update -y && \
yum install -y clang && \
rm -rf /var/cache/yum/*

# Install modelscope (for fast download) and ray (for multinode)
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
python3 -m pip cache purge
Expand Down
12 changes: 0 additions & 12 deletions docs/source/tutorials/DeepSeek-V3.2.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,6 @@ If you want to deploy multi-node environment, you need to verify multi-node comm

You can using our official docker image to run `DeepSeek-V3.2` directly..

:::{note}
We strongly recommend you to install clang make triton ascend stable enough. For Ubuntu, the command is

```bash
apt-get -y clang-15

update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
```

:::

:::::{tab-set}
:sync-group: install

Expand Down
10 changes: 1 addition & 9 deletions docs/source/tutorials/Qwen3-235B-A22B.md
Original file line number Diff line number Diff line change
Expand Up @@ -326,15 +326,7 @@ In this section, we provide simple scripts to re-produce our latest performance.
- CANN 8.3.RC2
- torch_npu 2.8.0
- HDK/driver 25.3.RC1
- triton_ascend 3.2.0.dev2025110717

**Notice:**
triton_ascend is required for reproducing best performance of Qwen3-235B in vLLM-Ascend. If it is not installed in your environment, please follow the instructions below:

```bash
wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl
pip install triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl
```
- triton_ascend 3.2.0

### Single Node A3 (64G*16)

Expand Down
11 changes: 0 additions & 11 deletions docs/source/tutorials/Qwen3-Next.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,6 @@ docker run --rm \

The Qwen3 Next is using [Triton Ascend](https://gitee.com/ascend/triton-ascend) which is currently experimental. In future versions, there may be behavioral changes related to stability, accuracy, and performance improvement.

### Install Clang

We strongly recommend you to install clang make triton ascend stable enough. For Ubuntu, the command is

```bash
apt-get -y clang-15

update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
```

### Inference

:::::{tab-set}
Expand Down
13 changes: 6 additions & 7 deletions tests/e2e/nightly/multi_node/scripts/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,12 @@ install_extra_components() {
echo "====> Extra components installation completed"
}

install_clang() {
echo "====> Installing clang-15"
apt-get update && apt-get install -y clang-15
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20

show_triton_ascend_info() {
echo "====> Check triton ascend info"
clang -v
echo "====> Clang-15 installation completed"
which bishengir-compile
pip show triton-ascend
}

kill_npu_processes() {
Expand Down Expand Up @@ -161,7 +160,7 @@ main() {
check_npu_info
check_and_config
show_vllm_info
install_clang
show_triton_ascend_info
if [[ "$CONFIG_YAML_PATH" == *"DeepSeek-V3_2-Exp-bf16.yaml" ]]; then
install_extra_components
fi
Expand Down