diff --git a/.github/workflows/nightly-test-npu.yml b/.github/workflows/nightly-test-npu.yml index 68689265d691..cf72e88eaace 100644 --- a/.github/workflows/nightly-test-npu.yml +++ b/.github/workflows/nightly-test-npu.yml @@ -23,7 +23,7 @@ jobs: matrix: part: [0, 1] container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-a3-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11 steps: - name: Checkout code uses: actions/checkout@v4 @@ -69,7 +69,7 @@ jobs: matrix: part: [0] container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-a3-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11 steps: - name: Checkout code uses: actions/checkout@v4 @@ -115,7 +115,7 @@ jobs: matrix: part: [0] container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-a3-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11 steps: - name: Checkout code uses: actions/checkout@v4 diff --git a/.github/workflows/pr-test-npu.yml b/.github/workflows/pr-test-npu.yml index af19e7549531..c7c48e2ef83d 100644 --- a/.github/workflows/pr-test-npu.yml +++ b/.github/workflows/pr-test-npu.yml @@ -45,7 +45,7 @@ jobs: if: needs.check-changes.outputs.main_package == 'true' runs-on: linux-arm64-npu-1 container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 steps: - name: Checkout code uses: actions/checkout@v4 @@ -88,7 +88,7 @@ jobs: matrix: part: [0, 1, 2] container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 steps: - name: Checkout code uses: actions/checkout@v4 @@ -127,7 +127,7 @@ jobs: if: needs.check-changes.outputs.main_package == 'true' runs-on: linux-arm64-npu-4 container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 steps: - name: Checkout code uses: actions/checkout@v4 @@ -170,7 +170,7 @@ jobs: matrix: part: [0, 1] container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-a3-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11 steps: - name: Checkout code uses: actions/checkout@v4 diff --git a/.github/workflows/release-docker-npu-nightly.yml b/.github/workflows/release-docker-npu-nightly.yml index 1873c2c361e6..1778038ac0eb 100644 --- a/.github/workflows/release-docker-npu-nightly.yml +++ b/.github/workflows/release-docker-npu-nightly.yml @@ -19,7 +19,7 @@ jobs: runs-on: ubuntu-22.04-arm strategy: matrix: - cann_version: ["8.3.rc1"] + cann_version: ["8.3.rc2"] device_type: ["910b", "a3"] steps: - name: Checkout repository @@ -73,6 +73,6 @@ jobs: push: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }} provenance: false build-args: | - SGLANG_KERNEL_NPU_TAG=20251128 + SGLANG_KERNEL_NPU_TAG=20251206 CANN_VERSION=${{ matrix.cann_version }} DEVICE_TYPE=${{ matrix.device_type }} diff --git a/.github/workflows/release-docker-npu.yml b/.github/workflows/release-docker-npu.yml index dd054c9c09a5..a366c30fe02f 100644 --- a/.github/workflows/release-docker-npu.yml +++ b/.github/workflows/release-docker-npu.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-22.04-arm strategy: matrix: - cann_version: ["8.3.rc1"] + cann_version: ["8.3.rc2"] device_type: ["910b", "a3"] steps: - name: Checkout repository @@ -70,6 +70,6 @@ jobs: push: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }} provenance: false build-args: | - SGLANG_KERNEL_NPU_TAG=20251128 + SGLANG_KERNEL_NPU_TAG=20251206 CANN_VERSION=${{ matrix.cann_version }} DEVICE_TYPE=${{ matrix.device_type }} diff --git a/docker/npu.Dockerfile b/docker/npu.Dockerfile index 54261e708a29..d028f4170723 100644 --- a/docker/npu.Dockerfile +++ b/docker/npu.Dockerfile @@ -1,4 +1,4 @@ -ARG CANN_VERSION=8.3.rc1 +ARG CANN_VERSION=8.3.rc2 ARG DEVICE_TYPE=a3 ARG OS=ubuntu22.04 ARG PYTHON_VERSION=py3.11 diff --git a/docs/platforms/ascend_npu.md b/docs/platforms/ascend_npu.md index a1357a623e65..e77be1d813ca 100644 --- a/docs/platforms/ascend_npu.md +++ b/docs/platforms/ascend_npu.md @@ -18,7 +18,7 @@ conda activate sglang_npu #### CANN -Prior to start work with SGLang on Ascend you need to install CANN Toolkit, Kernels operator package and NNAL version 8.3.RC1 or higher, check the [installation guide](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/83RC1/softwareinst/instg/instg_0008.html?Mode=PmIns&InstallType=local&OS=openEuler&Software=cannToolKit) +Prior to start work with SGLang on Ascend you need to install CANN Toolkit, Kernels operator package and NNAL version 8.3.RC2 or higher, check the [installation guide](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/83RC1/softwareinst/instg/instg_0008.html?Mode=PmIns&InstallType=local&OS=openEuler&Software=cannToolKit) #### MemFabric Adaptor diff --git a/docs/platforms/ascend_npu_deepseek_example.md b/docs/platforms/ascend_npu_deepseek_example.md index 90e2b973807a..acb864ef568e 100644 --- a/docs/platforms/ascend_npu_deepseek_example.md +++ b/docs/platforms/ascend_npu_deepseek_example.md @@ -30,7 +30,7 @@ python3 -m sglang.launch_server \ --trust-remote-code \ --attention-backend ascend \ --device npu \ - --quantization w8a8_int8 \ + --quantization modelslim \ --watchdog-timeout 9000 \ --host 127.0.0.1 \ --port 6688 \ @@ -89,7 +89,7 @@ python -m sglang.launch_server \ --mem-fraction-static 0.6 \ --attention-backend ascend \ --device npu \ - --quantization w8a8_int8 \ + --quantization modelslim \ --disaggregation-transfer-backend ascend \ --max-running-requests 8 \ --context-length 8192 \ @@ -145,7 +145,7 @@ python -m sglang.launch_server \ --max-running-requests 352 \ --attention-backend ascend \ --device npu \ - --quantization w8a8_int8 \ + --quantization modelslim \ --moe-a2a-backend deepep \ --enable-dp-attention \ --deepep-mode low_latency \ @@ -214,7 +214,7 @@ do --mem-fraction-static 0.81 \ --attention-backend ascend \ --device npu \ - --quantization w8a8_int8 \ + --quantization modelslim \ --disaggregation-transfer-backend ascend \ --max-running-requests 8 \ --context-length 8192 \ @@ -275,7 +275,7 @@ do --max-running-requests 832 \ --attention-backend ascend \ --device npu \ - --quantization w8a8_int8 \ + --quantization modelslim \ --moe-a2a-backend deepep \ --enable-dp-attention \ --deepep-mode low_latency \ diff --git a/docs/references/mindspore_models.md b/docs/references/mindspore_models.md index 80dd3b7f0e95..8ab5b81ce77e 100644 --- a/docs/references/mindspore_models.md +++ b/docs/references/mindspore_models.md @@ -7,7 +7,7 @@ MindSpore is a high-performance AI framework optimized for Ascend NPUs. This doc ## Requirements MindSpore currently only supports Ascend NPU devices. Users need to first install Ascend CANN software packages. -The CANN software packages can be downloaded from the [Ascend Official Website](https://www.hiascend.com). The recommended version is 8.3.RC1. +The CANN software packages can be downloaded from the [Ascend Official Website](https://www.hiascend.com). The recommended version is 8.3.RC2. ## Supported Models diff --git a/scripts/ci/npu_ci_install_dependency.sh b/scripts/ci/npu_ci_install_dependency.sh index 7cfdaaee6749..bbc27a6df1e7 100755 --- a/scripts/ci/npu_ci_install_dependency.sh +++ b/scripts/ci/npu_ci_install_dependency.sh @@ -49,7 +49,7 @@ wget -O "${BISHENG_NAME}" "${BISHENG_URL}" && chmod a+x "${BISHENG_NAME}" && "./ ### Install sgl-kernel-npu -SGL_KERNEL_NPU_TAG="20251128" +SGL_KERNEL_NPU_TAG="20251206" git clone --depth 1 https://github.com/sgl-project/sgl-kernel-npu.git --branch ${SGL_KERNEL_NPU_TAG} (cd sgl-kernel-npu && bash ./build.sh && ${PIP_INSTALL} output/deep_ep*.whl output/sgl_kernel_npu*.whl && cd "$(python3 -m pip show deep-ep | grep -E '^Location:' | awk '{print $2}')" && ln -s deep_ep/deep_ep_cpp*.so)