diff --git a/.github/labeler.yml b/.github/labeler.yml index b4ec2a424299..d73991ba029b 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -66,6 +66,13 @@ amd: - '**/*amd*' - '**/*rocm*' +# NPU specific +npu: + - changed-files: + - any-glob-to-any-file: + - '**/*npu*' + - '**/*ascend*' + # DeepSeek specific deepseek: - changed-files: diff --git a/.github/workflows/pr-test-npu.yml b/.github/workflows/pr-test-npu.yml index 4ebaec5af312..b28be34d71b8 100644 --- a/.github/workflows/pr-test-npu.yml +++ b/.github/workflows/pr-test-npu.yml @@ -1,22 +1,10 @@ -name: PR Test (Ascend NPU) +name: PR Test (NPU) on: push: branches: [ main ] - paths: - - "python/**" - - "!python/sglang/multimodal_gen/**" - - "scripts/ci/**" - - "test/**" - - ".github/workflows/pr-test-npu.yml" pull_request: branches: [ main ] - paths: - - "python/**" - - "!python/sglang/multimodal_gen/**" - - "scripts/ci/**" - - "test/**" - - ".github/workflows/pr-test-npu.yml" workflow_dispatch: concurrency: @@ -24,8 +12,39 @@ concurrency: cancel-in-progress: true jobs: - per-commit-1-ascend-npu: - if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') + + # ==================== PR Gate ==================== # + pr-gate: + uses: ./.github/workflows/pr-gate.yml + secrets: inherit + # ================================================= # + + # ==================== Check Changes ==================== # + check-changes: + needs: [pr-gate] + runs-on: ubuntu-latest + outputs: + main_package: ${{ steps.filter.outputs.main_package }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Detect file changes + id: filter + uses: dorny/paths-filter@v3 + with: + filters: | + main_package: + - "python/sglang/!(multimodal_gen)/**" + - "python/*.toml" + - "scripts/ci/npu_ci_install_dependency.sh" + - "test/srt/ascend/**" + - ".github/workflows/pr-test-npu.yml" + # ======================================================= # + + per-commit-1-npu-a2: + needs: [check-changes] + if: needs.check-changes.outputs.main_package == 'true' runs-on: linux-arm64-npu-1 container: image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 @@ -60,10 +79,11 @@ jobs: run: | export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}" cd test/srt - python3 run_suite.py --suite per-commit-1-ascend-npu + python3 run_suite.py --suite per-commit-1-npu-a2 - per-commit-2-ascend-npu: - if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') + per-commit-2-npu-a2: + needs: [check-changes] + if: needs.check-changes.outputs.main_package == 'true' runs-on: linux-arm64-npu-2 strategy: fail-fast: true @@ -102,10 +122,11 @@ jobs: run: | export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}" cd test/srt - python3 run_suite.py --suite per-commit-2-ascend-npu --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 + python3 run_suite.py --suite per-commit-2-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 - per-commit-4-ascend-npu: - if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') + per-commit-4-npu-a2: + needs: [check-changes] + if: needs.check-changes.outputs.main_package == 'true' runs-on: linux-arm64-npu-4 container: image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 @@ -140,10 +161,11 @@ jobs: run: | export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}" cd test/srt - python3 run_suite.py --suite per-commit-4-ascend-npu --timeout-per-file 3600 + python3 run_suite.py --suite per-commit-4-npu-a2 --timeout-per-file 3600 - per-commit-16-ascend-a3: - if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') + per-commit-16-npu-a3: + needs: [check-changes] + if: needs.check-changes.outputs.main_package == 'true' runs-on: linux-aarch64-a3-16 strategy: fail-fast: true @@ -182,4 +204,4 @@ jobs: run: | export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}" cd test/srt - python3 run_suite.py --suite per-commit-16-ascend-a3 --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 + python3 run_suite.py --suite per-commit-16-npu-a3 --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 diff --git a/.github/workflows/release-docker-npu-nightly.yml b/.github/workflows/release-docker-npu-nightly.yml index a8498a5568fb..765b45336248 100644 --- a/.github/workflows/release-docker-npu-nightly.yml +++ b/.github/workflows/release-docker-npu-nightly.yml @@ -1,4 +1,4 @@ -name: Release Docker Images Nightly (Ascend NPU) +name: Release Docker Images Nightly (NPU) on: pull_request: branches: diff --git a/.github/workflows/release-docker-npu.yml b/.github/workflows/release-docker-npu.yml index 162a8b67f371..9afe3dfd6dc5 100644 --- a/.github/workflows/release-docker-npu.yml +++ b/.github/workflows/release-docker-npu.yml @@ -1,18 +1,16 @@ -name: Release Docker Images (Ascend NPU) +name: Release Docker Images (NPU) on: push: - tags: - - "*" # Trigger on all tags and filterred by pep440 later tags-ignore: - "gateway-*" # Exclude gateway/router tags - "router-*" # Exclude router tags - workflow_dispatch: pull_request: branches: - main paths: - ".github/workflows/release-docker-npu.yml" - "docker/npu.Dockerfile" + workflow_dispatch: jobs: build: diff --git a/docker/npu.Dockerfile b/docker/npu.Dockerfile index 210369de7fd7..48d0f354e496 100644 --- a/docker/npu.Dockerfile +++ b/docker/npu.Dockerfile @@ -45,7 +45,6 @@ RUN apt-get update -y && apt upgrade -y && apt-get install -y \ libssl-dev \ pkg-config \ ca-certificates \ - protobuf-compiler \ && rm -rf /var/cache/apt/* \ && rm -rf /var/lib/apt/lists/* \ && update-ca-certificates \ @@ -54,18 +53,11 @@ RUN apt-get update -y && apt upgrade -y && apt-get install -y \ ENV LANG=en_US.UTF-8 ENV LANGUAGE=en_US:en ENV LC_ALL=en_US.UTF-8 -ENV PATH="/root/.cargo/bin:${PATH}" # Install dependencies # TODO: install from pypi released memfabric RUN pip install $MEMFABRIC_URL --no-cache-dir -RUN pip install setuptools-rust wheel build --no-cache-dir - -# install rustup from rustup.rs -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \ - && rustc --version && cargo --version && protoc --version - # Install vLLM RUN git clone --depth 1 https://github.com/vllm-project/vllm.git --branch $VLLM_TAG && \ (cd vllm && VLLM_TARGET_DEVICE="empty" pip install -v . --no-cache-dir) && rm -rf vllm @@ -79,12 +71,14 @@ RUN pip install torch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION --inde # Install SGLang RUN git clone https://github.com/sgl-project/sglang --branch $SGLANG_TAG && \ (cd sglang/python && rm -rf pyproject.toml && mv pyproject_other.toml pyproject.toml && pip install -v .[srt_npu] --no-cache-dir) && \ - (cd sglang/sgl-router && python -m build && pip install --force-reinstall dist/*.whl) && \ rm -rf sglang +# Install SGLang Model Gateway +RUN pip install sglang-router --no-cache-dir + # Install Deep-ep # pin wheel to 0.45.1 ref: https://github.com/pypa/wheel/issues/662 -RUN pip install wheel==0.45.1 && git clone --branch $SGLANG_KERNEL_NPU_TAG https://github.com/sgl-project/sgl-kernel-npu.git \ +RUN pip install wheel==0.45.1 && git clone --branch $SGLANG_KERNEL_NPU_TAG https://github.com/sgl-project/sgl-kernel-npu.git \ && export LD_LIBRARY_PATH=${ASCEND_CANN_PATH}/latest/runtime/lib64/stub:$LD_LIBRARY_PATH && \ source ${ASCEND_CANN_PATH}/set_env.sh && \ cd sgl-kernel-npu && \ diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 56336f60b6d7..9a00e4b7b251 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -514,25 +514,26 @@ } # Add Ascend NPU tests +# TODO: Set accurate estimate time # NOTE: please sort the test cases alphabetically by the test file name suite_ascend = { - "per-commit-1-ascend-npu": [ + "per-commit-1-npu-a2": [ TestFile("ascend/test_ascend_graph_tp1_bf16.py", 400), - TestFile("ascend/test_ascend_tp1_bf16.py", 400), TestFile("ascend/test_ascend_hicache_mha.py", 400), TestFile("ascend/test_ascend_sampling_backend.py", 400), + TestFile("ascend/test_ascend_tp1_bf16.py", 400), ], - "per-commit-2-ascend-npu": [ + "per-commit-2-npu-a2": [ TestFile("ascend/test_ascend_graph_tp2_bf16.py", 400), TestFile("ascend/test_ascend_mla_fia_w8a8int8.py", 400), TestFile("ascend/test_ascend_tp2_bf16.py", 400), TestFile("ascend/test_ascend_tp2_fia_bf16.py", 400), ], - "per-commit-4-ascend-npu": [ + "per-commit-4-npu-a2": [ TestFile("ascend/test_ascend_mla_w8a8int8.py", 400), TestFile("ascend/test_ascend_tp4_bf16.py", 400), ], - "per-commit-16-ascend-a3": [ + "per-commit-16-npu-a3": [ TestFile("ascend/test_ascend_deepep.py", 400), TestFile("ascend/test_ascend_deepseek_mtp.py", 400), ],