sgl-project · hnyls2002 · Nov 17, 2025 · Nov 15, 2025 · Nov 15, 2025 · Nov 15, 2025
@@ -98,7 +98,7 @@ jobs:
 
   # =============================================== primary ====================================================
 
-  unit-test-frontend-amd:
+  stage-a-test-1-amd:
     needs: [check-changes]
     if: always() && !failure() && !cancelled() &&
       ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
@@ -126,10 +126,10 @@ jobs:
       - name: Run test
         timeout-minutes: 10
         run: |
-          docker exec -w /sglang-checkout/test/lang ci_sglang python3 run_suite.py --suite per-commit
+          docker exec -w /sglang-checkout/test ci_sglang python3 run_suite.py
 
   unit-test-backend-1-gpu-amd:
-    needs: [check-changes, unit-test-frontend-amd]
+    needs: [check-changes, stage-a-test-1-amd]
     if: always() && !failure() && !cancelled() &&
       ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
     strategy:
@@ -420,7 +420,7 @@ jobs:
 
         sgl-kernel-unit-test-amd,
 
-        unit-test-frontend-amd,
+        stage-a-test-1-amd,
         unit-test-backend-1-gpu-amd,
         unit-test-backend-2-gpu-amd,
         unit-test-backend-8-gpu-amd,

@@ -365,7 +365,7 @@ jobs:
 
   # =============================================== primary ====================================================
 
-  unit-test-frontend:
+  stage-a-test-1:
     needs: [check-changes, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
       ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
@@ -391,11 +391,11 @@ jobs:
       - name: Run test
         timeout-minutes: 10
         run: |
-          cd test/lang
-          python3 run_suite.py --suite per-commit
+          cd test/
+          python3 run_suite.py
 
   unit-test-backend-1-gpu:
-    needs: [check-changes, unit-test-frontend, sgl-kernel-build-wheels]
+    needs: [check-changes, stage-a-test-1, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
       ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
     runs-on: 1-gpu-runner
@@ -562,7 +562,7 @@ jobs:
           python3 run_suite.py --suite per-commit-8-gpu-h20 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
 
   performance-test-1-gpu-part-1:
-    needs: [check-changes, sgl-kernel-build-wheels]
+    needs: [check-changes, sgl-kernel-build-wheels, stage-a-test-1]
     if: always() && !failure() && !cancelled() &&
       ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
     runs-on: 1-gpu-runner
@@ -623,7 +623,7 @@ jobs:
           python3 -m unittest test_bench_serving.TestBenchServing.test_lora_online_latency_with_concurrent_adapter_updates
 
   performance-test-1-gpu-part-2:
-    needs: [check-changes, sgl-kernel-build-wheels]
+    needs: [check-changes, sgl-kernel-build-wheels, stage-a-test-1]
     if: always() && !failure() && !cancelled() &&
       ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
     runs-on: 1-gpu-runner
@@ -676,7 +676,7 @@ jobs:
           python3 -m unittest test_bench_serving.TestBenchServing.test_vlm_online_latency
 
   performance-test-1-gpu-part-3:
-    needs: [check-changes, sgl-kernel-build-wheels]
+    needs: [check-changes, sgl-kernel-build-wheels, stage-a-test-1]
     if: always() && !failure() && !cancelled() &&
       ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
     runs-on: 1-gpu-runner
@@ -770,7 +770,7 @@ jobs:
           python3 -m unittest test_bench_serving.TestBenchServing.test_pp_long_context_prefill
 
   accuracy-test-1-gpu:
-    needs: [check-changes, sgl-kernel-build-wheels]
+    needs: [check-changes, sgl-kernel-build-wheels, stage-a-test-1]
     if: always() && !failure() && !cancelled() &&
       ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
     runs-on: 1-gpu-runner
@@ -965,7 +965,7 @@ jobs:
 
         multimodal-gen-test,
 
-        unit-test-frontend,
+        stage-a-test-1,
         unit-test-backend-1-gpu,
         unit-test-backend-2-gpu,
         unit-test-backend-4-gpu,

diff --git a/python/sglang/test/ci/ci_register.py b/python/sglang/test/ci/ci_register.py
@@ -0,0 +1,107 @@
+import ast
+import warnings
+from dataclasses import dataclass
+from enum import Enum, auto
+from typing import List
+
+
+class HWBackend(Enum):
+    CUDA = auto()
+    AMD = auto()
+
+
+@dataclass
+class CIRegistry:
+    backend: HWBackend
+    filename: str
+    estimation_time: float
+    stage: str
+
+
+def register_cuda_ci(estimation_time: float, ci_stage: str):
+    pass
+
+
+def register_amd_ci(estimation_time: float, ci_stage: str):
+    pass
+
+
+REGISTER_MAPPING = {
+    "register_cuda_ci": HWBackend.CUDA,
+    "register_amd_ci": HWBackend.AMD,
+}
+
+
+class RegistryVisitor(ast.NodeVisitor):
+    def __init__(self, filename: str):
+        self.filename = filename
+        self.registries: list[CIRegistry] = []
+
+    def _collect_ci_registry(self, func_call: ast.Call):
+        if not isinstance(func_call.func, ast.Name):
+            return None
+
+        if func_call.func.id not in REGISTER_MAPPING:
+            return None
+
+        hw = REGISTER_MAPPING[func_call.func.id]
+        est_time = None
+        ci_stage = None
+        for kw in func_call.keywords:
+            if kw.arg == "estimation_time":
+                if isinstance(kw.value, ast.Constant):
+                    est_time = kw.value.value
+            elif kw.arg == "ci_stage":
+                if isinstance(kw.value, ast.Constant):
+                    ci_stage = kw.value.value
+
+        for i, arg in enumerate(func_call.args):
+            if isinstance(arg, ast.Constant):
+                if i == 0:
+                    est_time = arg.value
+                elif i == 1:
+                    ci_stage = arg.value
+        assert (
+            est_time is not None
+        ), "esimation_time is required and should be a constant"
+        assert ci_stage is not None, "ci_stage is required and should be a constant"
+        return CIRegistry(
+            backend=hw, filename=self.filename, estimation_time=est_time, stage=ci_stage
+        )
+
+    def visit_Module(self, node):
+        for stmt in node.body:
+            if not isinstance(stmt, ast.Expr) or not isinstance(stmt.value, ast.Call):
+                continue
+
+            cr = self._collect_ci_registry(stmt.value)
+            if cr is not None:
+                self.registries.append(cr)
+
+        self.generic_visit(node)
+
+
+def ut_parse_one_file(filename: str) -> List[CIRegistry]:
+    with open(filename, "r") as f:
+        file_content = f.read()
+    tree = ast.parse(file_content, filename=filename)
+    visitor = RegistryVisitor(filename=filename)
+    visitor.visit(tree)
+    return visitor.registries
+
+
+def collect_tests(files: list[str], sanity_check: bool = True) -> List[CIRegistry]:
+    ci_tests = []
+    for file in files:
+        registries = ut_parse_one_file(file)
+        if len(registries) == 0:
+            msg = f"No CI registry found in {file}"
+            if sanity_check:
+                raise ValueError(msg)
+            else:
+                warnings.warn(msg)
+                continue
+
+        ci_tests.extend(registries)
+
+    return ci_tests
diff --git a/python/sglang/test/ci/ci_utils.py b/python/sglang/test/ci/ci_utils.py
@@ -0,0 +1,134 @@
+import os
+import subprocess
+import threading
+import time
+from dataclasses import dataclass
+from typing import Callable, List, Optional
+
+from sglang.srt.utils.common import kill_process_tree
+
+
+@dataclass
+class TestFile:
+    name: str
+    estimated_time: float = 60
+
+
+def run_with_timeout(
+    func: Callable,
+    args: tuple = (),
+    kwargs: Optional[dict] = None,
+    timeout: float = None,
+):
+    """Run a function with timeout."""
+    ret_value = []
+
+    def _target_func():
+        ret_value.append(func(*args, **(kwargs or {})))
+
+    t = threading.Thread(target=_target_func)
+    t.start()
+    t.join(timeout=timeout)
+    if t.is_alive():
+        raise TimeoutError()
+
+    if not ret_value:
+        raise RuntimeError()
+
+    return ret_value[0]
+
+
+def run_unittest_files(
+    files: List[TestFile], timeout_per_file: float, continue_on_error: bool = False
+):
+    """
+    Run a list of test files.
+
+    Args:
+        files: List of TestFile objects to run
+        timeout_per_file: Timeout in seconds for each test file
+        continue_on_error: If True, continue running remaining tests even if one fails.
+                          If False, stop at first failure (default behavior for PR tests).
+    """
+    tic = time.perf_counter()
+    success = True
+    passed_tests = []
+    failed_tests = []
+
+    for i, file in enumerate(files):
+        filename, estimated_time = file.name, file.estimated_time
+        process = None
+
+        def run_one_file(filename):
+            nonlocal process
+
+            filename = os.path.join(os.getcwd(), filename)
+            print(
+                f".\n.\nBegin ({i}/{len(files) - 1}):\npython3 {filename}\n.\n.\n",
+                flush=True,
+            )
+            tic = time.perf_counter()
+
+            process = subprocess.Popen(
+                ["python3", filename], stdout=None, stderr=None, env=os.environ
+            )
+            process.wait()
+            elapsed = time.perf_counter() - tic
+
+            print(
+                f".\n.\nEnd ({i}/{len(files) - 1}):\n{filename=}, {elapsed=:.0f}, {estimated_time=}\n.\n.\n",
+                flush=True,
+            )
+            return process.returncode
+
+        try:
+            ret_code = run_with_timeout(
+                run_one_file, args=(filename,), timeout=timeout_per_file
+            )
+            if ret_code != 0:
+                print(
+                    f"\n✗ FAILED: {filename} returned exit code {ret_code}\n",
+                    flush=True,
+                )
+                success = False
+                failed_tests.append((filename, f"exit code {ret_code}"))
+                if not continue_on_error:
+                    # Stop at first failure for PR tests
+                    break
+                # Otherwise continue to next test for nightly tests
+            else:
+                passed_tests.append(filename)
+        except TimeoutError:
+            kill_process_tree(process.pid)
+            time.sleep(5)
+            print(
+                f"\n✗ TIMEOUT: {filename} after {timeout_per_file} seconds\n",
+                flush=True,
+            )
+            success = False
+            failed_tests.append((filename, f"timeout after {timeout_per_file}s"))
+            if not continue_on_error:
+                # Stop at first timeout for PR tests
+                break
+            # Otherwise continue to next test for nightly tests
+
+    if success:
+        print(f"Success. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)
+    else:
+        print(f"Fail. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)
+
+    # Print summary
+    print(f"\n{'='*60}", flush=True)
+    print(f"Test Summary: {len(passed_tests)}/{len(files)} passed", flush=True)
+    print(f"{'='*60}", flush=True)
+    if passed_tests:
+        print("✓ PASSED:", flush=True)
+        for test in passed_tests:
+            print(f"  {test}", flush=True)
+    if failed_tests:
+        print("\n✗ FAILED:", flush=True)
+        for test, reason in failed_tests:
+            print(f"  {test} ({reason})", flush=True)
+    print(f"{'='*60}\n", flush=True)
+
+    return 0 if success else -1