Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
508a101
add deepseek-r1-w8a8 tutorial.
Gongdayao Nov 27, 2025
9b8c81a
update deepseek-r1-w8a8 tutorial.
Gongdayao Nov 27, 2025
43f36a5
[BugFix] Adapted Qwen3-Next eager mode to v0.11.2 (#4477)
drslark Nov 27, 2025
2811167
[bugfix] fix ray start failed: local_world_size cannot little than vi…
leo-pony Nov 27, 2025
7a75bc1
[feature] Add Custom Op grouped_matmul_swiglu_quant (#4431)
SlightwindSec Nov 27, 2025
4a9763d
[TEST] Add eagle proposer ut (#4447)
GDzhu01 Nov 27, 2025
7af4a8c
[main]Upgrade cann to 8.3rc2 (#4350)
MrZ20 Nov 28, 2025
f304263
[Quantization] Support compressed tensors w8a8 static and w8a8 dynami…
LHXuuu Nov 28, 2025
c7c0cfc
[MM][Model][Perf] Remove Qwen2.5-VL modeling files and add patch for …
shen-shanshan Nov 28, 2025
f01cb6b
[P/D] Add readme for PD separation (#4182)
wangxiaoteng888 Nov 28, 2025
3f014b3
[Doc]Delete equals sign (#4537)
herizhen Nov 28, 2025
44ad9d2
[Kernel] add custom op GmmSwigluQuantWeightNzTensorList (#3804)
ChenxiQ Nov 28, 2025
461c1db
[Feature][main]reconstruction kvpool connector to ascend connector (#…
fems14 Nov 28, 2025
303a29c
【OPS】qwen3-next support triton chunk_gated_delta_rule ops (#4070)
shiyuan680 Nov 28, 2025
c91235d
update triton package url (#4552)
wangxiyuan Nov 28, 2025
8a34496
[Bugfix] Fix model run _npu_flash_attention hang issue (#4410)
Semmer2 Nov 29, 2025
247e1b9
update gpqadataset accuracy in Deepseek-R1-w8a8 tutorial and file nam…
Gongdayao Nov 29, 2025
8c536af
update DeepSeek-R1-w8a8.md to resove PyMarkdown failded
Gongdayao Nov 29, 2025
de89b57
update Deepseek-R1-w8a8 to Deepseek-R1
Gongdayao Nov 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/Dockerfile.buildwheel
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# This file is a part of the vllm-ascend project.
#
ARG PY_VERSION=3.11
FROM quay.io/ascend/manylinux:8.3.rc1-910b-manylinux_2_28-py${PY_VERSION}
FROM quay.io/ascend/manylinux:8.3.rc2-910b-manylinux_2_28-py${PY_VERSION}

ARG COMPILE_CUSTOM_KERNELS=1
ARG SOC_VERSION
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/_e2e_nightly_multi_node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ on:
required: false
type: string
description: base image for pods
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11"
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11"
config_file_path:
required: true
type: string
Expand Down Expand Up @@ -69,7 +69,7 @@ jobs:
# This is the runner with no NPU for k8s controller
runs-on: ${{ inputs.runner }}
container:
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
env:
KUBECONFIG: /tmp/kubeconfig
KUBECTL: /root/.cache/.kube/kubectl
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_e2e_nightly_single_node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ on:
image:
required: false
type: string
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11"
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11"
tests:
required: true
type: string
Expand Down
9 changes: 3 additions & 6 deletions .github/workflows/_e2e_nightly_single_node_models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
name: ${{inputs.model_list}} accuracy test
runs-on: ${{ inputs.runner }}
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
env:
VLLM_USE_MODELSCOPE: True
GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
Expand Down Expand Up @@ -108,10 +108,7 @@ jobs:
if: ${{ inputs.runner == 'linux-aarch64-a2-4' && contains(inputs.model_list, 'Qwen3-Next-80B-A3B-Instruct') }}
shell: bash -l {0}
run: |
wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/Ascend-BiSheng-toolkit_aarch64.run -O /tmp/Ascend-BiSheng-toolkit_aarch64.run
chmod a+x /tmp/Ascend-BiSheng-toolkit_aarch64.run
/tmp/Ascend-BiSheng-toolkit_aarch64.run --install
. /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev20250914-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"

- name: Resolve vllm-ascend version
Expand Down Expand Up @@ -225,4 +222,4 @@ jobs:
path: ./benchmarks/accuracy/
if-no-files-found: warn
retention-days: 90
overwrite: true
overwrite: true
12 changes: 5 additions & 7 deletions .github/workflows/_e2e_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ jobs:
VLLM_USE_MODELSCOPE: True
if: ${{ inputs.type == 'full' }}
run: |
pytest -sv tests/e2e/multicard/test_quantization.py
pytest -sv tests/e2e/multicard/test_aclgraph_capture_replay.py
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
pytest -sv tests/e2e/multicard/test_full_graph_mode.py
Expand Down Expand Up @@ -211,7 +212,7 @@ jobs:
if: ${{ needs.e2e.result == 'success' && needs.e2e-2-cards.result == 'success' && inputs.type == 'full' }}
runs-on: linux-aarch64-a3-4
container:
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
Expand Down Expand Up @@ -274,11 +275,8 @@ jobs:
- name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
shell: bash -l {0}
run: |
wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/Ascend-BiSheng-toolkit_aarch64.run -O /tmp/Ascend-BiSheng-toolkit_aarch64.run
chmod a+x /tmp/Ascend-BiSheng-toolkit_aarch64.run
/tmp/Ascend-BiSheng-toolkit_aarch64.run --install
. /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev20250914-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27.whl"

- name: Run vllm-project/vllm-ascend Qwen3 Next test
working-directory: ./vllm-ascend
Expand All @@ -287,5 +285,5 @@ jobs:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
run: |
. /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
pytest -sv tests/e2e/multicard/test_qwen3_next.py
2 changes: 1 addition & 1 deletion .github/workflows/_nightly_image_build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
--network host \
--platform linux/arm64 \
-f .github/Dockerfile.nightly.${TARGET} \
--build-arg CANN_VERSION="8.3.rc1" \
--build-arg CANN_VERSION="8.3.rc2" \
--build-arg UBUNTU_VERSION="22.04" \
--build-arg PYTHON_VERSION="3.11" \
-t "$IMAGE_TAG" .
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nightly_benchmarks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
vllm_ascend_branch: main
max-parallel: 1
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
volumes:
- /usr/local/dcmi:/usr/local/dcmi
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/release_whl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,14 @@ jobs:
--exclude libge_common_base.so \
--exclude libc10.so \
--exclude libc_sec.so \
--exclude libnnopbase.so \
--exclude libprofapi.so \
--exclude libgraph_base.so \
--exclude libgraph.so \
--exclude libexe_graph.so \
--exclude "libascend*.so" \
--exclude "libtorch*.so" \
--exclude "libopapi.so" \
--exclude "liberror_manager.so"
done
rm -f dist/*.whl
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/vllm_ascend_test_310p.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
runs-on: ${{ matrix.os }}
container:
# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-310p-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/vllm_ascend_test_full_vllm_main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@ jobs:
with:
vllm: main
runner: linux-aarch64-a2
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
type: full
2 changes: 1 addition & 1 deletion .github/workflows/vllm_ascend_test_pr_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,5 @@ jobs:
with:
vllm: ${{ matrix.vllm_version }}
runner: linux-aarch64-a2
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
type: full
6 changes: 3 additions & 3 deletions .github/workflows/vllm_ascend_test_pr_light.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ jobs:
if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
runs-on: ubuntu-latest
container:
# fixme: vllm-ascend install failed with 8.3.rc1 on github action
image: quay.io/ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11
# fixme: vllm-ascend install failed with 8.3.rc2 on github action
image: quay.io/ascend/cann:8.2.rc2-910b-ubuntu22.04-py3.11
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
Expand Down Expand Up @@ -151,5 +151,5 @@ jobs:
with:
vllm: ${{ matrix.vllm_version }}
runner: linux-aarch64-a2
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
type: light
2 changes: 1 addition & 1 deletion .github/workflows/vllm_ascend_test_report.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ jobs:
with:
vllm: v0.11.2
runner: ${{ matrix.runner }}
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
model_list: ${{ toJson(matrix.model_list) }}
upload: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}

Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ repos:
- id: codespell
args: [
--toml, pyproject.toml,
'--skip', 'tests/e2e/multicard/test_torchair_graph_mode.py,csrc/mla_preprocess/**,tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**,.github/**,typos.toml',
'--skip', 'tests/e2e/multicard/test_torchair_graph_mode.py,csrc/**,tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**,.github/**,typos.toml',
'-L', 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,ArchType,AND'
]
additional_dependencies:
Expand All @@ -37,7 +37,7 @@ repos:
- id: typos
args: [
"--force-exclude",
"--exclude", "csrc/mla_preprocess/**"
"--exclude", "csrc/**"
]
- repo: https://github.com/PyCQA/isort
rev: 6.0.1
Expand Down
8 changes: 6 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ ascendc_library(vllm_ascend_kernels SHARED
message("TORCH_NPU_PATH is ${TORCH_NPU_PATH}")

file(GLOB VLLM_ASCEND_SRC
${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp)
${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csrc/aclnn_torch_adapter/*.cpp)

include_directories(
${pybind11_INCLUDE_DIRS}
Expand All @@ -81,13 +82,15 @@ set(
${TORCH_NPU_INCLUDE_DIRS}
${ASCEND_HOME_PATH}/include
${ASCEND_HOME_PATH}/aarch64-linux/include/experiment/platform
${ASCEND_HOME_PATH}/x86_64-linux/include/experiment/platform
)

pybind11_add_module(vllm_ascend_C ${VLLM_ASCEND_SRC})

target_link_directories(
vllm_ascend_C
PRIVATE
${TORCH_LIBRARY_DIRS}
${TORCH_NPU_PATH}/lib/
${ASCEND_HOME_PATH}/lib64
)
Expand All @@ -96,14 +99,15 @@ target_link_libraries(
vllm_ascend_C
PUBLIC
${TORCH_LIBRARIES}
libtorch_npu.so
torch_npu
vllm_ascend_kernels
ascendcl
tiling_api
register
platform
ascendalog
dl
opapi
)

target_link_options(vllm_ascend_C PRIVATE "-Wl,-rpath,$ORIGIN:$ORIGIN/lib")
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# This file is a part of the vllm-ascend project.
#

FROM quay.io/ascend/cann:8.3.rc1-910b-ubuntu22.04-py3.11
FROM quay.io/ascend/cann:8.3.rc2-910b-ubuntu22.04-py3.11

ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG COMPILE_CUSTOM_KERNELS=1
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.310p
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# This file is a part of the vllm-ascend project.
#

FROM quay.io/ascend/cann:8.3.rc1-310p-ubuntu22.04-py3.11
FROM quay.io/ascend/cann:8.3.rc2-310p-ubuntu22.04-py3.11

ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG COMPILE_CUSTOM_KERNELS=1
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.310p.openEuler
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# This file is a part of the vllm-ascend project.
#

FROM quay.io/ascend/cann:8.3.rc1-310p-openeuler24.03-py3.11
FROM quay.io/ascend/cann:8.3.rc2-310p-openeuler24.03-py3.11

ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG COMPILE_CUSTOM_KERNELS=1
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.a3
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# This file is a part of the vllm-ascend project.
#

FROM quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
FROM quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11

ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG COMPILE_CUSTOM_KERNELS=1
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.a3.openEuler
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# This file is a part of the vllm-ascend project.
#

FROM quay.io/ascend/cann:8.3.rc1-a3-openeuler24.03-py3.11
FROM quay.io/ascend/cann:8.3.rc2-a3-openeuler24.03-py3.11

ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG COMPILE_CUSTOM_KERNELS=1
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.openEuler
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# This file is a part of the vllm-ascend project.
#

FROM quay.io/ascend/cann:8.3.rc1-910b-openeuler24.03-py3.11
FROM quay.io/ascend/cann:8.3.rc2-910b-openeuler24.03-py3.11

ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG COMPILE_CUSTOM_KERNELS=1
Expand Down
Loading
Loading