ZhengdQin · ZhengdQin · Jan 16, 2026 · Dec 4, 2025 · Dec 4, 2025 · Dec 4, 2025
diff --git a/.github/CI_PERMISSIONS.json b/.github/CI_PERMISSIONS.json
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -2,41 +2,50 @@
 /docker @Fridge003 @ispobock @HaiShaw @ishandhanani
 /docker/npu.Dockerfile @ping1jing2 @iforgetmyname
 /python/pyproject.toml @merrymercy @Fridge003 @ispobock
-/python/sglang/multimodal_gen @mickqian
+/python/sglang/multimodal_gen @mickqian @yhyang201
+/python/sglang/srt/batch_invariant_ops @Fridge003 @hebiao064
 /python/sglang/srt/constrained @hnyls2002 @DarkSharpness
+/python/sglang/srt/compilation @hebiao064
 /python/sglang/srt/disaggregation @ByronHsu @hnyls2002 @ShangmingCai
 /python/sglang/srt/disaggregation/ascend @ping1jing2 @iforgetmyname
 /python/sglang/srt/distributed @yizhang2077 @merrymercy @ch-wan
 /python/sglang/srt/entrypoints @ispobock @CatherineSue @slin1237 @merrymercy @JustinTong0323
+/python/sglang/srt/entrypoints/grpc_server.py @CatherineSue @slin1237
 /python/sglang/srt/eplb @fzyzcjy @ch-wan
 /python/sglang/srt/function_call @CatherineSue @JustinTong0323
-/python/sglang/srt/layers @merrymercy @Ying1123 @Fridge003 @ispobock @HaiShaw @ch-wan @BBuf @kushanam @Edwardf0t1
+/python/sglang/srt/grpc @CatherineSue @slin1237
+/python/sglang/srt/hardware_backend/npu @ping1jing2 @iforgetmyname
+/python/sglang/srt/layers @merrymercy @Ying1123 @Fridge003 @ispobock @HaiShaw @ch-wan @BBuf @Edwardf0t1
+/python/sglang/srt/layers/attention/fla @yizhang2077 @hebiao064
+/python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py @yizhang2077 @hebiao064 @hanming-lu
+/python/sglang/srt/layers/attention/mamba @yizhang2077 @hebiao064
 /python/sglang/srt/layers/quantization @ch-wan @BBuf @Edwardf0t1 @FlamingoPg @AniZpZ
-/python/sglang/srt/layers/attention/ascend_backend.py @ping1jing2 @iforgetmyname
 /python/sglang/srt/lora @Ying1123 @Fridge003 @lifuhuang
 /python/sglang/srt/managers @merrymercy @Ying1123 @hnyls2002 @xiezhq-hermann @zhyncs
-/python/sglang/srt/mem_cache @merrymercy @Ying1123 @hnyls2002 @xiezhq-hermann
-/python/sglang/srt/mem_cache/allocator_ascend.py @ping1jing2 @iforgetmyname
+/python/sglang/srt/mem_cache @merrymercy @Ying1123 @hnyls2002 @xiezhq-hermann @hanming-lu @yizhang2077
 /python/sglang/srt/model_executor @merrymercy @Ying1123 @hnyls2002 @Fridge003 @ispobock
-/python/sglang/srt/model_executor/npu_graph_runner.py @ping1jing2 @iforgetmyname
-/python/sglang/srt/multimodal @mickqian @JustinTong0323 @yhyang201
+/python/sglang/srt/model_executor/piecewise_cuda_graph_runner.py @hebiao064
+/python/sglang/srt/models/deepseek_v2.py @fzyzcjy @zhyncs @ispobock @ch-wan @merrymercy @Fridge003
+/python/sglang/srt/multimodal @mickqian @JustinTong0323 @yhyang201 @yuan-luo
 /python/sglang/srt/speculative @Ying1123 @merrymercy @hnyls2002
 /sgl-kernel @zhyncs @ispobock @BBuf @yizhang2077 @merrymercy @FlamingoPg @HaiShaw
-/sgl-router @slin1237 @ByronHsu @CatherineSue
-/sgl-router/benches @slin1237
-/sgl-router/py_src @CatherineSue @key4ng @slin1237
-/sgl-router/py_test @CatherineSue @key4ng
-/sgl-router/src/config @slin1237
-/sgl-router/src/core @slin1237
-/sgl-router/src/data_connector @key4ng
-/sgl-router/src/grpc_client @CatherineSue @slin1237
-/sgl-router/src/mcp @key4ng @slin1237
-/sgl-router/src/policies @slin1237 @ByronHsu
-/sgl-router/src/proto @CatherineSue @slin1237
-/sgl-router/src/protocols @CatherineSue @key4ng
-/sgl-router/src/reasoning_parser @CatherineSue
-/sgl-router/src/routers @CatherineSue @key4ng @slin1237
-/sgl-router/src/tokenizer @slin1237 @CatherineSue
-/sgl-router/src/tool_parser @slin1237 @CatherineSue
+/sgl-model-gateway @slin1237 @CatherineSue
+/sgl-model-gateway/benches @slin1237
+/sgl-model-gateway/bindings/python @CatherineSue @key4ng @slin1237
+/sgl-model-gateway/py_test @CatherineSue @key4ng
+/sgl-model-gateway/src/config @slin1237
+/sgl-model-gateway/src/core @slin1237
+/sgl-model-gateway/src/data_connector @key4ng
+/sgl-model-gateway/src/grpc_client @CatherineSue @slin1237
+/sgl-model-gateway/src/mcp @key4ng @slin1237
+/sgl-model-gateway/src/policies @slin1237 @ByronHsu
+/sgl-model-gateway/src/proto @CatherineSue @slin1237
+/sgl-model-gateway/src/protocols @CatherineSue @key4ng
+/sgl-model-gateway/src/reasoning_parser @CatherineSue
+/sgl-model-gateway/src/routers @CatherineSue @key4ng @slin1237
+/sgl-model-gateway/src/tokenizer @slin1237 @CatherineSue
+/sgl-model-gateway/src/tool_parser @slin1237 @CatherineSue
+/sgl-model-gateway/src/wasm @slin1237
+/sgl-model-gateway/examples/wasm @slin1237
 /test/srt/ascend @ping1jing2 @iforgetmyname
 /test/srt/test_modelopt* @Edwardf0t1
diff --git a/.github/MAINTAINER.md b/.github/MAINTAINER.md
@@ -25,19 +25,19 @@ __Note__: Difference between Merge Oncall and Codeowner
 - The Codeowner is a passive protection role provided by GitHub; it prevents accidental changes to critical code.
 - The list of Merge Oncalls is attached below. The list of Codeowners is in the [CODEOWNERS](./CODEOWNERS) file.
 
-__Note__: The permissions to trigger CI tests are defined separately according to these [rules](https://docs.sglang.ai/developer_guide/contribution_guide.html#how-to-trigger-ci-tests).
+__Note__: The permissions to trigger CI tests are defined separately according to these [rules](https://docs.sglang.io/developer_guide/contribution_guide.html#how-to-trigger-ci-tests).
 
 
 ## Pull Request Merge Process
 1. The author submits a pull request (PR) and fills out the PR checklist.
 2. A bot assigns this PR to a Merge Oncall and @-mentions them. At the same time, GitHub will automatically request reviews from Codeowners.
-3. Someone tags the PR with a `run-ci` label ([help](https://docs.sglang.ai/developer_guide/contribution_guide.html#how-to-trigger-ci-tests)). Then the author can trigger CI by pushing new commits.
+3. Someone tags the PR with a `run-ci` label ([help](https://docs.sglang.io/developer_guide/contribution_guide.html#how-to-trigger-ci-tests)). Then the author can trigger CI by pushing new commits.
 4. The Merge Oncall coordinates the review (e.g., asking people to review) and approves the PR; the Codeowners also approve the PR. If the assigned Merge Oncall is not responsive, the author can ping other related Merge Oncalls and Reviewers in the list below.
 5. The code can now be merged:
    - **Ideal case:** For each modified file, one Codeowner has approved the PR. The PR has also passed the required CI tests. Then, anyone with write permission can merge the PR.
    - **Exception:** In cases where it is difficult to meet all requirements (due to flaky CI or slow responses), a Merge Oncall can bypass branch protection to merge the PR.
 
-If you meet any issues during the merge, you can discuss in [slack channels](https://slack.sglang.ai/): #dev, #pull-request, and #ci-cd-build-release.
+If you meet any issues during the merge, you can discuss in [slack channels](https://slack.sglang.io/): #dev, #pull-request, and #ci-cd-build-release.
 
 ## The List of Merge Oncalls and Reviewers
 The format is @github-username (Slack username).

diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -1,10 +1,10 @@
 # Configuration for the GitHub Labeler action
 # Automatically adds labels to PRs based on the files changed
 
-# Router specific (Rust code in sgl-router)
+# Router specific (Rust code in sgl-model-gateway)
 model-gateway:
   - changed-files:
-    - any-glob-to-any-file: 'sgl-router/**/*'
+    - any-glob-to-any-file: 'sgl-model-gateway/**/*'
 
 # Kernel specific
 sgl-kernel:
@@ -40,6 +40,11 @@ Multi-modal:
       - '**/*vision*'
       - '**/*vlm*'
 
+# Diffusion
+diffusion:
+  - changed-files:
+    - any-glob-to-any-file: 'python/sglang/multimodal_gen/**/*'
+
 # LoRA
 lora:
   - changed-files:
@@ -66,6 +71,22 @@ amd:
       - '**/*amd*'
       - '**/*rocm*'
 
+# NPU specific
+npu:
+  - changed-files:
+    - any-glob-to-any-file:
+      - '**/*npu*'
+      - '**/*ascend*'
+
+# Blackwell
+blackwell:
+  - changed-files:
+    - any-glob-to-any-file:
+      - '**/*nvfp4*'
+      - 'sgl-kernel/csrc/attention/cutlass_sm100_mla/**/*'
+      - 'python/sglang/srt/layers/attention/trtllm_mla_backend.py'
+      - 'python/sglang/srt/layers/attention/trtllm_mha_backend.py'
+
 # DeepSeek specific
 deepseek:
   - changed-files:
@@ -77,3 +98,13 @@ hicache:
   - changed-files:
     - any-glob-to-any-file:
       - '**/*hicache*'
+
+# Deterministic
+deterministic:
+  - changed-files:
+    - any-glob-to-any-file: 'python/sglang/srt/batch_invariant_ops/**/*'
+
+# Piecewise CUDA Graph
+piecewise-cuda-graph:
+  - changed-files:
+    - any-glob-to-any-file: 'python/sglang/srt/compilation/**/*'
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -1,4 +1,4 @@
-<!-- Thank you for your contribution! Please follow these guidelines to enhance your pull request. If anything is unclear, submit your PR and reach out to maintainers for assistance. Join our Slack community at https://slack.sglang.ai to discuss further. -->
+<!-- Thank you for your contribution! Please follow these guidelines to enhance your pull request. If anything is unclear, submit your PR and reach out to maintainers for assistance. Join our Slack community at https://slack.sglang.io to discuss further. -->
 
 ## Motivation
 
@@ -18,9 +18,9 @@
 
 ## Checklist
 
-- [ ] Format your code according to the [Format code with pre-commit](https://docs.sglang.ai/developer_guide/contribution_guide.html#format-code-with-pre-commit).
-- [ ] Add unit tests according to the [Run and add unit tests](https://docs.sglang.ai/developer_guide/contribution_guide.html#run-and-add-unit-tests).
-- [ ] Update documentation according to [Write documentations](https://docs.sglang.ai/developer_guide/contribution_guide.html#write-documentations).
-- [ ] Provide accuracy and speed benchmark results according to [Test the accuracy](https://docs.sglang.ai/developer_guide/contribution_guide.html#test-the-accuracy) and [Benchmark the speed](https://docs.sglang.ai/developer_guide/contribution_guide.html#benchmark-the-speed).
-- [ ] Follow the SGLang code style [guidance](https://docs.sglang.ai/developer_guide/contribution_guide.html#code-style-guidance).
+- [ ] Format your code according to the [Format code with pre-commit](https://docs.sglang.io/developer_guide/contribution_guide.html#format-code-with-pre-commit).
+- [ ] Add unit tests according to the [Run and add unit tests](https://docs.sglang.io/developer_guide/contribution_guide.html#run-and-add-unit-tests).
+- [ ] Update documentation according to [Write documentations](https://docs.sglang.io/developer_guide/contribution_guide.html#write-documentations).
+- [ ] Provide accuracy and speed benchmark results according to [Test the accuracy](https://docs.sglang.io/developer_guide/contribution_guide.html#test-the-accuracy) and [Benchmark the speed](https://docs.sglang.io/developer_guide/contribution_guide.html#benchmark-the-speed).
+- [ ] Follow the SGLang code style [guidance](https://docs.sglang.io/developer_guide/contribution_guide.html#code-style-guidance).
 - [ ] Work with maintainers to merge your PR. See the [PR Merge Process](https://github.com/sgl-project/sglang/blob/main/.github/MAINTAINER.md#pull-request-merge-process)
diff --git a/.github/workflows/auto-tune.yml b/.github/workflows/auto-tune.yml
@@ -0,0 +1,10 @@
+name: Auto tune
+
+on:
+  workflow_dispatch:
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
diff --git a/.github/workflows/bot-bump-kernel-version-to-sglang.yml b/.github/workflows/bot-bump-kernel-version-to-sglang.yml
@@ -10,6 +10,9 @@ permissions:
 jobs:
   bump-kernel-version-to-sglang:
     runs-on: ubuntu-latest
+    outputs:
+      branch_name: ${{ steps.set_output.outputs.branch_name }}
+      needs_sync: ${{ steps.check_sync.outputs.needs_sync }}
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
@@ -32,6 +35,7 @@ jobs:
 
       - name: Configure Git and branch
         if: steps.check_sync.outputs.needs_sync == 'true'
+        id: set_output
         run: |
           git config user.name "sglang-bot"
           git config user.email "sglang-bot@users.noreply.github.com"
@@ -41,6 +45,7 @@ jobs:
           git checkout -b "$BRANCH_NAME"
           echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV
           echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV
+          echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT
 
       - name: Run kernel version bump script
         if: steps.check_sync.outputs.needs_sync == 'true'
@@ -53,3 +58,11 @@ jobs:
           GH_TOKEN: ${{ secrets.GH_PAT_FOR_PULL_REQUEST }}
         run: |
           bash scripts/release/commit_and_pr_kernel_to_sglang.sh "$KERNEL_VERSION" "$BRANCH_NAME"
+
+  run-nightly-tests:
+    needs: bump-kernel-version-to-sglang
+    if: needs.bump-kernel-version-to-sglang.outputs.needs_sync == 'true'
+    uses: ./.github/workflows/nightly-test-nvidia.yml
+    with:
+      ref: ${{ needs.bump-kernel-version-to-sglang.outputs.branch_name }}
+    secrets: inherit
diff --git a/.github/workflows/bot-bump-sglang-version.yml b/.github/workflows/bot-bump-sglang-version.yml
@@ -15,6 +15,8 @@ permissions:
 jobs:
   bump-sglang-version:
     runs-on: ubuntu-latest
+    outputs:
+      branch_name: ${{ steps.set_output.outputs.branch_name }}
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
@@ -31,13 +33,15 @@ jobs:
           pip install tomli
 
       - name: Configure Git and branch
+        id: set_output
         run: |
           git config user.name "sglang-bot"
           git config user.email "sglang-bot@users.noreply.github.com"
           RANDOM_SUFFIX=$(echo $RANDOM | md5sum | head -c 4)
           BRANCH_NAME="bot/bump-sglang-version-${{ github.event.inputs.new_version }}-${RANDOM_SUFFIX}"
           git checkout -b "$BRANCH_NAME"
           echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV
+          echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT
 
       - name: Run SGLang version bump script
         run: |
@@ -48,3 +52,10 @@ jobs:
           GH_TOKEN: ${{ secrets.GH_PAT_FOR_PULL_REQUEST }}
         run: |
           bash scripts/release/commit_and_pr.sh "SGLang" "${{ github.event.inputs.new_version }}" "$BRANCH_NAME"
+
+  run-nightly-tests:
+    needs: bump-sglang-version
+    uses: ./.github/workflows/nightly-test-nvidia.yml
+    with:
+      ref: ${{ needs.bump-sglang-version.outputs.branch_name }}
+    secrets: inherit
diff --git a/.github/workflows/cancel-all-pending-pr-test-runs.yml b/.github/workflows/cancel-all-pending-pr-test-runs.yml
@@ -32,14 +32,18 @@ jobs:
 
           for workflow_file in "${WORKFLOW_FILES[@]}"; do
             echo "--- Checking workflow: $workflow_file ---"
+
+            # Fetch list and pipe to while loop
             gh run list \
               --repo "$REPO" \
               --workflow "$workflow_file" \
               --json databaseId,status \
               --limit 1000 \
-              | jq -r '.[] | select(.status=="queued" or .status=="in_progress") | .databaseId' \
+              | jq -r '.[] | select(.status=="queued" or .status=="in_progress" or .status=="waiting") | .databaseId' \
               | while read run_id; do
-                  echo "Cancelling run ID: $run_id for workflow: $workflow_file"
-                  gh run cancel "$run_id" --repo "$REPO"
+                  echo "Attempting to cancel run ID: $run_id for workflow: $workflow_file"
+
+                  # The "|| echo ..." part prevents the script from crashing if cancellation fails
+                  gh run cancel "$run_id" --repo "$REPO" || echo "⚠️ Could not cancel run $run_id (it may have already completed). Continuing..."
                 done
           done
diff --git a/.github/workflows/ci-failure-monitor.yml b/.github/workflows/ci-failure-monitor.yml
@@ -8,7 +8,7 @@ on:
       limit:
         description: 'Number of workflow runs to analyze (across all workflows)'
         required: false
-        default: '800'
+        default: '1000'
         type: string
       threshold:
         description: 'Alert threshold for consecutive failures'
@@ -51,7 +51,7 @@ jobs:
           cd scripts/ci_monitor
           python ci_failures_analysis.py \
             --token $GITHUB_TOKEN \
-            --limit ${{ inputs.limit || '800' }} \
+            --limit ${{ inputs.limit || '1000' }} \
             --threshold ${{ inputs.threshold || '4' }} \
             --output ci_failure_analysis_$(date +%Y%m%d_%H%M%S).json
 

diff --git a/.github/workflows/ci-monitor.yml b/.github/workflows/ci-monitor.yml
@@ -46,6 +46,15 @@ jobs:
           cd scripts/ci_monitor
           python ci_analyzer.py --token $GITHUB_TOKEN --limit ${{ inputs.limit || '1000' }} --output ci_analysis_$(date +%Y%m%d_%H%M%S).json
 
+      - name: Run Nightly Test Analysis
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
+          PYTHONUNBUFFERED: 1
+          PYTHONIOENCODING: utf-8
+        run: |
+          cd scripts/ci_monitor
+          python ci_analyzer.py --token $GITHUB_TOKEN --mode nightly --days 2 --output nightly_analysis_$(date +%Y%m%d_%H%M%S).json
+
       - name: Run Performance Analysis
         env:
           GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
@@ -61,6 +70,7 @@ jobs:
           name: ci-analysis-results-${{ github.run_number }}
           path: |
             scripts/ci_monitor/ci_analysis_*.json
+            scripts/ci_monitor/nightly_analysis_*.json
             scripts/ci_monitor/performance_tables_*
           retention-days: 30
 

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -35,16 +35,16 @@ jobs:
 
       - name: Check proto files are in sync
         run: |
-          if ! diff -q python/sglang/srt/grpc/sglang_scheduler.proto sgl-router/src/proto/sglang_scheduler.proto; then
+          if ! diff -q python/sglang/srt/grpc/sglang_scheduler.proto sgl-model-gateway/src/proto/sglang_scheduler.proto; then
             echo "❌ ERROR: Proto files are out of sync!"
             echo ""
             echo "The following files must be kept identical:"
             echo "  - python/sglang/srt/grpc/sglang_scheduler.proto"
-            echo "  - sgl-router/src/proto/sglang_scheduler.proto"
+            echo "  - sgl-model-gateway/src/proto/sglang_scheduler.proto"
             echo ""
             echo "Please ensure both files have the same content."
             echo ""
             echo "Differences:"
-            diff python/sglang/srt/grpc/sglang_scheduler.proto sgl-router/src/proto/sglang_scheduler.proto || true
+            diff python/sglang/srt/grpc/sglang_scheduler.proto sgl-model-gateway/src/proto/sglang_scheduler.proto || true
             exit 1
           fi
diff --git a/.github/workflows/nightly-release-gateway.yml b/.github/workflows/nightly-release-gateway.yml
@@ -50,9 +50,9 @@ jobs:
         with:
           path: sglang-repo
 
-      - name: Move sgl-router folder to root and delete sglang-repo
+      - name: Move sgl-model-gateway folder to root and delete sglang-repo
         run: |
-          mv sglang-repo/sgl-router/* .
+          mv sglang-repo/sgl-model-gateway/* .
           rm -rf sglang-repo
           ls -alt
         shell: bash
@@ -138,9 +138,9 @@ jobs:
         with:
           path: sglang-repo
 
-      - name: Move sgl-router folder to root and delete sglang-repo
+      - name: Move sgl-model-gateway folder to root and delete sglang-repo
         run: |
-          mv sglang-repo/sgl-router/* .
+          mv sglang-repo/sgl-model-gateway/* .
           rm -rf sglang-repo
           ls -alt
 

diff --git a/.github/workflows/nightly-test-amd.yml b/.github/workflows/nightly-test-amd.yml
@@ -19,7 +19,7 @@ jobs:
     if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
     strategy:
       matrix:
-        runner: [linux-mi300-gpu-2, linux-mi325-gpu-2-nightly]
+        runner: [linux-mi325-gpu-2]
     runs-on: ${{matrix.runner}}
     steps:
       - name: Checkout code