chanh · chanh · Aug 7, 2025 · Jul 26, 2025 · Jul 26, 2025 · Jul 26, 2025
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -1,23 +1,20 @@
-/3rdparty/amd @HaiShaw
+.github @merrymercy @zhyncs
 /docker @zhyncs @HaiShaw @ByronHsu
-/docs @zhaochenyang20
-/python/sglang/lang @merrymercy @Ying1123 @hnyls2002 @ByronHsu
-/python/sglang/srt @merrymercy @Ying1123 @hnyls2002 @zhyncs @ispobock @ByronHsu
+/python/pyproject.toml @merrymercy @zhyncs
+/python/sglang/* @merrymercy @Ying1123 @zhyncs @hnyls2002
 /python/sglang/srt/constrained @hnyls2002
-/python/sglang/srt/disaggregation @hnyls2002 @ByronHsu
-/python/sglang/srt/distributed @yizhang2077
-/python/sglang/srt/entrypoints @zhaochenyang20
-/python/sglang/srt/entrypoints/openai @merrymercy @Ying1123 @hnyls2002 @zhyncs @ispobock @ByronHsu @CatherineSue
-/python/sglang/srt/layers @merrymercy @Ying1123 @zhyncs @ispobock @HaiShaw @ch-wan @BBuf
+/python/sglang/srt/disaggregation @ByronHsu @hnyls2002
+/python/sglang/srt/disaggregation/mooncake @ShangmingCai
+/python/sglang/srt/distributed @yizhang2077 @merrymercy
+/python/sglang/srt/entrypoints @ispobock @CatherineSue @slin1237 @merrymercy
+/python/sglang/srt/eplb @fzyzcjy
+/python/sglang/srt/function_call @CatherineSue
+/python/sglang/srt/layers @merrymercy @Ying1123 @zhyncs @ispobock @HaiShaw @ch-wan @BBuf @kushanam
 /python/sglang/srt/lora @Ying1123 @Fridge003
 /python/sglang/srt/managers @merrymercy @Ying1123 @hnyls2002 @xiezhq-hermann
 /python/sglang/srt/mem_cache @merrymercy @Ying1123 @hnyls2002 @xiezhq-hermann
 /python/sglang/srt/model_executor @merrymercy @Ying1123 @hnyls2002 @zhyncs @ispobock
-/python/sglang/srt/models @merrymercy @Ying1123 @hnyls2002 @zhyncs @ispobock @ByronHsu @zhaochenyang20
-/python/sglang/srt/sampling @merrymercy @hnyls2002
-/python/sglang/srt/speculative @Ying1123 @merrymercy @rkooo567 @kssteven418
 /python/sglang/srt/multimodal @mickqian @JustinTong0323
-/test/lang @merrymercy @Ying1123 @ByronHsu
-/test/srt @merrymercy @Ying1123 @zhyncs
-/sgl-router @ByronHsu @Ying1123 @slin1237
-/sgl-kernel @zhyncs @ispobock @HandH1998 @BBuf @yizhang2077 @merrymercy @yinfan98 @HaiShaw
+/python/sglang/srt/speculative @Ying1123 @merrymercy @rkooo567 @kssteven418
+/sgl-kernel @zhyncs @ispobock @HandH1998 @BBuf @yizhang2077 @merrymercy @FlamingoPg @HaiShaw
+/sgl-router @slin1237 @ByronHsu
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -8,6 +8,14 @@
 
 <!-- Describe the changes made in this PR. -->
 
+## Accuracy Test
+
+<!-- If this PR affects model-side code (e.g., kernels, model architecture), please provide accuracy test results. Ref: https://docs.sglang.ai/references/accuracy_evaluation.html -->
+
+## Benchmark & Profiling
+
+<!-- If this PR is expected to impact performance, please provide benchmark and profiling results. Ref: https://docs.sglang.ai/references/benchmark_and_profiling.html -->
+
 ## Checklist
 
 - [ ] Format your code according to the [Code Formatting with Pre-Commit](https://docs.sglang.ai/references/contribution_guide.html#code-formatting-with-pre-commit).

diff --git a/.github/workflows/pr-test-amd.yml b/.github/workflows/pr-test-amd.yml
@@ -291,7 +291,7 @@ jobs:
           bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600
 
       - name: Run CustomAllReduce test
-        timeout-minutes: 10
+        timeout-minutes: 20
         run: |
           bash scripts/amd_ci_exec.sh -e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m unittest test_custom_allreduce.TestCustomAllReduce
 

diff --git a/.github/workflows/pr-test-npu.yml b/.github/workflows/pr-test-npu.yml
@@ -0,0 +1,130 @@
+name: PR Test (Ascend NPU)
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "python/**"
+      - "scripts/**"
+      - "test/**"
+      - ".github/workflows/pr-test-npu.yml"
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "python/**"
+      - "scripts/**"
+      - "test/**"
+      - ".github/workflows/pr-test-npu.yml"
+  workflow_dispatch:
+
+concurrency:
+  group: pr-test-npu-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  per-commit-1-ascend-npu:
+    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+        github.event.pull_request.draft == false
+    runs-on: linux-arm64-npu-1
+    container:
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1.alpha003-910b-ubuntu22.04-py3.11
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          bash scripts/npu_ci_install_dependency.sh
+          # copy required file from our daily cache
+          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
+          # copy download through proxy
+          curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
+
+      - name: Run test
+        timeout-minutes: 30
+        env:
+          SGLANG_USE_MODELSCOPE: true
+          SGLANG_IS_IN_CI: true
+          HF_ENDPOINT: https://hf-mirror.com
+          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
+        run: |
+          cd test/srt
+          python3 run_suite.py --suite per-commit-1-ascend-npu
+
+  per-commit-2-ascend-npu:
+    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+        github.event.pull_request.draft == false
+    runs-on: linux-arm64-npu-2
+    container:
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1.alpha003-910b-ubuntu22.04-py3.11
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          bash scripts/npu_ci_install_dependency.sh
+          # copy required file from our daily cache
+          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
+          # copy download through proxy
+          curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
+
+      - name: Run test
+        timeout-minutes: 30
+        env:
+          SGLANG_USE_MODELSCOPE: true
+          SGLANG_IS_IN_CI: true
+          HF_ENDPOINT: https://hf-mirror.com
+          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
+        run: |
+          cd test/srt
+          python3 run_suite.py --suite per-commit-2-ascend-npu
+
+  per-commit-4-ascend-npu:
+    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+        github.event.pull_request.draft == false
+    runs-on: linux-arm64-npu-4
+    container:
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1.alpha003-910b-ubuntu22.04-py3.11
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          bash scripts/npu_ci_install_dependency.sh
+          # copy required file from our daily cache
+          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
+          # copy download through proxy
+          curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
+
+      - name: Run test
+        timeout-minutes: 30
+        env:
+          SGLANG_USE_MODELSCOPE: true
+          SGLANG_IS_IN_CI: true
+          HF_ENDPOINT: https://hf-mirror.com
+          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
+        run: |
+          cd test/srt
+          python3 run_suite.py --suite per-commit-4-ascend-npu --timeout-per-file 3600
+
+  finish:
+    if: always()
+    needs:
+      - per-commit-1-ascend-npu
+      - per-commit-2-ascend-npu
+      - per-commit-4-ascend-npu
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check all dependent job statuses
+        run: |
+          results=(${{ join(needs.*.result, ' ') }})
+          for result in "${results[@]}"; do
+            if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
+              echo "Job failed with result: $result"
+              exit 1
+            fi
+          done
+          echo "All jobs completed successfully"
+          exit 0