Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ amd:
- '**/*amd*'
- '**/*rocm*'

# NPU specific
npu:
- changed-files:
- any-glob-to-any-file:
- '**/*npu*'
- '**/*ascend*'

# DeepSeek specific
deepseek:
- changed-files:
Expand Down
72 changes: 47 additions & 25 deletions .github/workflows/pr-test-npu.yml
Original file line number Diff line number Diff line change
@@ -1,31 +1,50 @@
name: PR Test (Ascend NPU)
name: PR Test (NPU)

on:
push:
branches: [ main ]
paths:
- "python/**"
- "!python/sglang/multimodal_gen/**"
- "scripts/ci/**"
- "test/**"
- ".github/workflows/pr-test-npu.yml"
pull_request:
branches: [ main ]
paths:
- "python/**"
- "!python/sglang/multimodal_gen/**"
- "scripts/ci/**"
- "test/**"
- ".github/workflows/pr-test-npu.yml"
workflow_dispatch:

concurrency:
group: pr-test-npu-${{ github.ref }}
cancel-in-progress: true

jobs:
per-commit-1-ascend-npu:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')

# ==================== PR Gate ==================== #
pr-gate:
uses: ./.github/workflows/pr-gate.yml
secrets: inherit
# ================================================= #

# ==================== Check Changes ==================== #
check-changes:
needs: [pr-gate]
runs-on: ubuntu-latest
outputs:
main_package: ${{ steps.filter.outputs.main_package }}
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Detect file changes
id: filter
uses: dorny/paths-filter@v3
with:
filters: |
main_package:
- "python/sglang/!(multimodal_gen)/**"
- "python/*.toml"
- "scripts/ci/npu_ci_install_dependency.sh"
- "test/srt/ascend/**"
- ".github/workflows/pr-test-npu.yml"
# ======================================================= #

per-commit-1-npu-a2:
needs: [check-changes]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: linux-arm64-npu-1
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
Expand Down Expand Up @@ -60,10 +79,11 @@ jobs:
run: |
export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
cd test/srt
python3 run_suite.py --suite per-commit-1-ascend-npu
python3 run_suite.py --suite per-commit-1-npu-a2

per-commit-2-ascend-npu:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
per-commit-2-npu-a2:
needs: [check-changes]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: linux-arm64-npu-2
strategy:
fail-fast: true
Expand Down Expand Up @@ -102,10 +122,11 @@ jobs:
run: |
export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
cd test/srt
python3 run_suite.py --suite per-commit-2-ascend-npu --auto-partition-id ${{ matrix.part }} --auto-partition-size 3
python3 run_suite.py --suite per-commit-2-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3

per-commit-4-ascend-npu:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
per-commit-4-npu-a2:
needs: [check-changes]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: linux-arm64-npu-4
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
Expand Down Expand Up @@ -140,10 +161,11 @@ jobs:
run: |
export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
cd test/srt
python3 run_suite.py --suite per-commit-4-ascend-npu --timeout-per-file 3600
python3 run_suite.py --suite per-commit-4-npu-a2 --timeout-per-file 3600

per-commit-16-ascend-a3:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
per-commit-16-npu-a3:
needs: [check-changes]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: linux-aarch64-a3-16
strategy:
fail-fast: true
Expand Down Expand Up @@ -182,4 +204,4 @@ jobs:
run: |
export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
cd test/srt
python3 run_suite.py --suite per-commit-16-ascend-a3 --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
python3 run_suite.py --suite per-commit-16-npu-a3 --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
2 changes: 1 addition & 1 deletion .github/workflows/release-docker-npu-nightly.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Release Docker Images Nightly (Ascend NPU)
name: Release Docker Images Nightly (NPU)
on:
pull_request:
branches:
Expand Down
6 changes: 2 additions & 4 deletions .github/workflows/release-docker-npu.yml
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
name: Release Docker Images (Ascend NPU)
name: Release Docker Images (NPU)
on:
push:
tags:
- "*" # Trigger on all tags and filterred by pep440 later
tags-ignore:
- "gateway-*" # Exclude gateway/router tags
- "router-*" # Exclude router tags
workflow_dispatch:
pull_request:
branches:
- main
paths:
- ".github/workflows/release-docker-npu.yml"
- "docker/npu.Dockerfile"
workflow_dispatch:

jobs:
build:
Expand Down
14 changes: 4 additions & 10 deletions docker/npu.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ RUN apt-get update -y && apt upgrade -y && apt-get install -y \
libssl-dev \
pkg-config \
ca-certificates \
protobuf-compiler \
&& rm -rf /var/cache/apt/* \
&& rm -rf /var/lib/apt/lists/* \
&& update-ca-certificates \
Expand All @@ -54,18 +53,11 @@ RUN apt-get update -y && apt upgrade -y && apt-get install -y \
ENV LANG=en_US.UTF-8
ENV LANGUAGE=en_US:en
ENV LC_ALL=en_US.UTF-8
ENV PATH="/root/.cargo/bin:${PATH}"

# Install dependencies
# TODO: install from pypi released memfabric
RUN pip install $MEMFABRIC_URL --no-cache-dir

RUN pip install setuptools-rust wheel build --no-cache-dir

# install rustup from rustup.rs
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \
&& rustc --version && cargo --version && protoc --version

# Install vLLM
RUN git clone --depth 1 https://github.com/vllm-project/vllm.git --branch $VLLM_TAG && \
(cd vllm && VLLM_TARGET_DEVICE="empty" pip install -v . --no-cache-dir) && rm -rf vllm
Expand All @@ -79,12 +71,14 @@ RUN pip install torch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION --inde
# Install SGLang
RUN git clone https://github.com/sgl-project/sglang --branch $SGLANG_TAG && \
(cd sglang/python && rm -rf pyproject.toml && mv pyproject_other.toml pyproject.toml && pip install -v .[srt_npu] --no-cache-dir) && \
(cd sglang/sgl-router && python -m build && pip install --force-reinstall dist/*.whl) && \
rm -rf sglang

# Install SGLang Model Gateway
RUN pip install sglang-router --no-cache-dir

# Install Deep-ep
# pin wheel to 0.45.1 ref: https://github.com/pypa/wheel/issues/662
RUN pip install wheel==0.45.1 && git clone --branch $SGLANG_KERNEL_NPU_TAG https://github.com/sgl-project/sgl-kernel-npu.git \
RUN pip install wheel==0.45.1 && git clone --branch $SGLANG_KERNEL_NPU_TAG https://github.com/sgl-project/sgl-kernel-npu.git \
&& export LD_LIBRARY_PATH=${ASCEND_CANN_PATH}/latest/runtime/lib64/stub:$LD_LIBRARY_PATH && \
source ${ASCEND_CANN_PATH}/set_env.sh && \
cd sgl-kernel-npu && \
Expand Down
11 changes: 6 additions & 5 deletions test/srt/run_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,25 +514,26 @@
}

# Add Ascend NPU tests
# TODO: Set accurate estimate time
# NOTE: please sort the test cases alphabetically by the test file name
suite_ascend = {
"per-commit-1-ascend-npu": [
"per-commit-1-npu-a2": [
TestFile("ascend/test_ascend_graph_tp1_bf16.py", 400),
TestFile("ascend/test_ascend_tp1_bf16.py", 400),
TestFile("ascend/test_ascend_hicache_mha.py", 400),
TestFile("ascend/test_ascend_sampling_backend.py", 400),
TestFile("ascend/test_ascend_tp1_bf16.py", 400),
],
"per-commit-2-ascend-npu": [
"per-commit-2-npu-a2": [
TestFile("ascend/test_ascend_graph_tp2_bf16.py", 400),
TestFile("ascend/test_ascend_mla_fia_w8a8int8.py", 400),
TestFile("ascend/test_ascend_tp2_bf16.py", 400),
TestFile("ascend/test_ascend_tp2_fia_bf16.py", 400),
],
"per-commit-4-ascend-npu": [
"per-commit-4-npu-a2": [
TestFile("ascend/test_ascend_mla_w8a8int8.py", 400),
TestFile("ascend/test_ascend_tp4_bf16.py", 400),
],
"per-commit-16-ascend-a3": [
"per-commit-16-npu-a3": [
TestFile("ascend/test_ascend_deepep.py", 400),
TestFile("ascend/test_ascend_deepseek_mtp.py", 400),
],
Expand Down
Loading