sgl-project · mingfeima · May 12, 2026 · May 12, 2026
@@ -115,7 +115,7 @@ jobs:
         timeout-minutes: 36
         run: |
           docker exec -w /sglang-checkout/ ci_sglang_xeon \
-            bash -c "source /opt/.venv/bin/activate && cd ./test && python3 run_suite.py --hw cpu --suite stage-b-test-cpu"
+            bash -c "source /opt/.venv/bin/activate && cd ./test/srt && python3 run_suite.py --suite per-commit-cpu --timeout-per-file 1500"
 
       - name: Change permission
         timeout-minutes: 2

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -92,7 +92,6 @@ repos:
         entry: python3 scripts/ci/check_registered_tests.py
         language: system
         files: ^test/registered/.*\.py$
-        exclude: ^test/registered/.*/utils\.py$
         pass_filenames: false
       - id: check-no-docs-changes
         name: reject changes under legacy docs/

@@ -83,7 +83,7 @@ It is designed to deliver low-latency and high-throughput inference across a wid
     }}
   >
     <a
-      href="https://lmsys.org/blog/2026-04-29-p2p-update/"
+      href="https://lmsys.org/blog/2026-04-25-deepseek-v4/"
       target="_blank"
       rel="noopener noreferrer"
       style={{
@@ -104,8 +104,8 @@ It is designed to deliver low-latency and high-throughput inference across a wid
         }}
       >
         <img
-          src="https://lmsys.org/images/blog/p2p-update/p2p-overview.png"
-          alt="Updating 1T parameters in seconds \u2014 P2P weight transfer in Large Scale Distributed RL"
+          src="https://lmsys.org/images/blog/deepseek_v4/benchmark_vs_oss.png"
+          alt="DeepSeek-V4 on Day 0: From Fast Inference to Verified RL with SGLang and Miles"
           style={{
             width: "100%",
             height: "100%",
@@ -124,7 +124,7 @@ It is designed to deliver low-latency and high-throughput inference across a wid
             fontSize: "0.98rem",
           }}
         >
-          {"Updating 1T parameters in seconds \u2014 P2P weight transfer in Large Scale Distributed RL"}
+          {"DeepSeek-V4 on Day 0: From Fast Inference to Verified RL with SGLang and Miles"}
         </p>
         <p
           style={{
@@ -133,12 +133,12 @@ It is designed to deliver low-latency and high-throughput inference across a wid
             opacity: 0.75,
           }}
         >
-          {"April 29, 2026"}
+          {"April 25, 2026"}
         </p>
       </div>
     </a>
     <a
-      href="https://lmsys.org/blog/2026-04-25-deepseek-v4/"
+      href="https://lmsys.org/blog/2026-04-10-sglang-hisparse/"
       target="_blank"
       rel="noopener noreferrer"
       style={{
@@ -159,8 +159,8 @@ It is designed to deliver low-latency and high-throughput inference across a wid
         }}
       >
         <img
-          src="https://lmsys.org/images/blog/deepseek_v4/benchmark_vs_oss.png"
-          alt="DeepSeek-V4 on Day 0: From Fast Inference to Verified RL with SGLang and Miles"
+          src="https://lmsys.org/images/blog/hisparse/hisparse_overview.png"
+          alt="HiSparse: Turbocharging Sparse Attention with Hierarchical Memory"
           style={{
             width: "100%",
             height: "100%",
@@ -179,7 +179,7 @@ It is designed to deliver low-latency and high-throughput inference across a wid
             fontSize: "0.98rem",
           }}
         >
-          {"DeepSeek-V4 on Day 0: From Fast Inference to Verified RL with SGLang and Miles"}
+          {"HiSparse: Turbocharging Sparse Attention with Hierarchical Memory"}
         </p>
         <p
           style={{
@@ -188,12 +188,12 @@ It is designed to deliver low-latency and high-throughput inference across a wid
             opacity: 0.75,
           }}
         >
-          {"April 25, 2026"}
+          {"April 10, 2026"}
         </p>
       </div>
     </a>
     <a
-      href="https://lmsys.org/blog/2026-04-10-sglang-hisparse/"
+      href="https://lmsys.org/blog/2026-03-25-gtc2026/"
       target="_blank"
       rel="noopener noreferrer"
       style={{
@@ -214,8 +214,8 @@ It is designed to deliver low-latency and high-throughput inference across a wid
         }}
       >
         <img
-          src="https://lmsys.org/images/blog/hisparse/hisparse_overview.png"
-          alt="HiSparse: Turbocharging Sparse Attention with Hierarchical Memory"
+          src="https://lmsys.org/images/blog/gtc2026/happyhour-crowd.jpg"
+          alt="Highlights of SGLang at NVIDIA GTC 2026"
           style={{
             width: "100%",
             height: "100%",
@@ -234,7 +234,7 @@ It is designed to deliver low-latency and high-throughput inference across a wid
             fontSize: "0.98rem",
           }}
         >
-          {"HiSparse: Turbocharging Sparse Attention with Hierarchical Memory"}
+          {"Highlights of SGLang at NVIDIA GTC 2026"}
         </p>
         <p
           style={{
@@ -243,12 +243,12 @@ It is designed to deliver low-latency and high-throughput inference across a wid
             opacity: 0.75,
           }}
         >
-          {"April 10, 2026"}
+          {"March 31, 2026"}
         </p>
       </div>
     </a>
     <a
-      href="https://lmsys.org/blog/2026-03-25-gtc2026/"
+      href="https://lmsys.org/blog/2026-03-25-eep-partial-failure-tolerance/"
       target="_blank"
       rel="noopener noreferrer"
       style={{
@@ -269,8 +269,8 @@ It is designed to deliver low-latency and high-throughput inference across a wid
         }}
       >
         <img
-          src="https://lmsys.org/images/blog/gtc2026/happyhour-crowd.jpg"
-          alt="Highlights of SGLang at NVIDIA GTC 2026"
+          src="https://lmsys.org/images/blog/eep-partial-failure-tolerance/figure.png"
+          alt="Elastic EP in SGLang: Achieving Partial Failure Tolerance for DeepSeek MoE Deployments"
           style={{
             width: "100%",
             height: "100%",
@@ -289,7 +289,7 @@ It is designed to deliver low-latency and high-throughput inference across a wid
             fontSize: "0.98rem",
           }}
         >
-          {"Highlights of SGLang at NVIDIA GTC 2026"}
+          {"Elastic EP in SGLang: Achieving Partial Failure Tolerance for DeepSeek MoE Deployments"}
         </p>
         <p
           style={{
@@ -298,12 +298,12 @@ It is designed to deliver low-latency and high-throughput inference across a wid
             opacity: 0.75,
           }}
         >
-          {"March 31, 2026"}
+          {"March 25, 2026"}
         </p>
       </div>
     </a>
     <a
-      href="https://lmsys.org/blog/2026-03-25-eep-partial-failure-tolerance/"
+      href="https://lmsys.org/blog/2026-03-17-rocm-miles-rl-amd/"
       target="_blank"
       rel="noopener noreferrer"
       style={{
@@ -324,8 +324,8 @@ It is designed to deliver low-latency and high-throughput inference across a wid
         }}
       >
         <img
-          src="https://lmsys.org/images/blog/eep-partial-failure-tolerance/figure.png"
-          alt="Elastic EP in SGLang: Achieving Partial Failure Tolerance for DeepSeek MoE Deployments"
+          src="https://lmsys.org/images/blog/rocm_miles_rl/fig_1.png"
+          alt="ROCm Support for Miles: Large-Scale RL Post-Training on AMD Instinct\u2122 GPUs"
           style={{
             width: "100%",
             height: "100%",
@@ -344,7 +344,7 @@ It is designed to deliver low-latency and high-throughput inference across a wid
             fontSize: "0.98rem",
           }}
         >
-          {"Elastic EP in SGLang: Achieving Partial Failure Tolerance for DeepSeek MoE Deployments"}
+          {"ROCm Support for Miles: Large-Scale RL Post-Training on AMD Instinct\u2122 GPUs"}
         </p>
         <p
           style={{
@@ -353,12 +353,12 @@ It is designed to deliver low-latency and high-throughput inference across a wid
             opacity: 0.75,
           }}
         >
-          {"March 25, 2026"}
+          {"March 17, 2026"}
         </p>
       </div>
     </a>
     <a
-      href="https://lmsys.org/blog/2026-03-17-rocm-miles-rl-amd/"
+      href="https://lmsys.org/blog/2026-03-11-run-nvidia-nemotron-3-super/"
       target="_blank"
       rel="noopener noreferrer"
       style={{
@@ -379,8 +379,8 @@ It is designed to deliver low-latency and high-throughput inference across a wid
         }}
       >
         <img
-          src="https://lmsys.org/images/blog/rocm_miles_rl/fig_1.png"
-          alt="ROCm Support for Miles: Large-Scale RL Post-Training on AMD Instinct\u2122 GPUs"
+          src="https://lmsys.org/images/blog/nemotron-3-super/figure_1.svg"
+          alt="SGLang Adds Day-0 Support for NVIDIA Nemotron 3 Super for building High-Efficiency Multi-Agent Systems"
           style={{
             width: "100%",
             height: "100%",
@@ -399,7 +399,7 @@ It is designed to deliver low-latency and high-throughput inference across a wid
             fontSize: "0.98rem",
           }}
         >
-          {"ROCm Support for Miles: Large-Scale RL Post-Training on AMD Instinct\u2122 GPUs"}
+          {"SGLang Adds Day-0 Support for NVIDIA Nemotron 3 Super for building High-Efficiency Multi-Agent Systems"}
         </p>
         <p
           style={{
@@ -408,7 +408,7 @@ It is designed to deliver low-latency and high-throughput inference across a wid
             opacity: 0.75,
           }}
         >
-          {"March 17, 2026"}
+          {"March 11, 2026"}
         </p>
       </div>
     </a>

diff --git a/scripts/ci/check_registered_tests.py b/scripts/ci/check_registered_tests.py
@@ -22,11 +22,11 @@ def main() -> int:
     ci_register = importlib.util.module_from_spec(spec)
     spec.loader.exec_module(ci_register)
 
-    # Same filter as run_suite.py: skip conftest.py, __init__.py, and utils.py
+    # Same filter as run_suite.py: skip conftest.py and __init__.py
     files = sorted(
         f
         for f in glob.glob("test/registered/**/*.py", recursive=True)
-        if os.path.basename(f) not in ("conftest.py", "__init__.py", "utils.py")
+        if os.path.basename(f) not in ("conftest.py", "__init__.py")
     )
     if not files:
         return 0

diff --git a/test/registered/cpu/test_activation.py b/test/registered/cpu/test_activation.py
diff --git a/test/registered/cpu/test_binding.py b/test/registered/cpu/test_binding.py