Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/pr-test-amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ jobs:

# =============================================== primary ====================================================

unit-test-frontend-amd:
stage-a-test-1-amd:
needs: [check-changes]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
Expand Down Expand Up @@ -126,10 +126,10 @@ jobs:
- name: Run test
timeout-minutes: 10
run: |
docker exec -w /sglang-checkout/test/lang ci_sglang python3 run_suite.py --suite per-commit
docker exec -w /sglang-checkout/test ci_sglang python3 run_suite.py

unit-test-backend-1-gpu-amd:
needs: [check-changes, unit-test-frontend-amd]
needs: [check-changes, stage-a-test-1-amd]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy:
Expand Down Expand Up @@ -420,7 +420,7 @@ jobs:

sgl-kernel-unit-test-amd,

unit-test-frontend-amd,
stage-a-test-1-amd,
unit-test-backend-1-gpu-amd,
unit-test-backend-2-gpu-amd,
unit-test-backend-8-gpu-amd,
Expand Down
18 changes: 9 additions & 9 deletions .github/workflows/pr-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ jobs:

# =============================================== primary ====================================================

unit-test-frontend:
stage-a-test-1:
needs: [check-changes, sgl-kernel-build-wheels]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
Expand All @@ -391,11 +391,11 @@ jobs:
- name: Run test
timeout-minutes: 10
run: |
cd test/lang
python3 run_suite.py --suite per-commit
cd test/
python3 run_suite.py

unit-test-backend-1-gpu:
needs: [check-changes, unit-test-frontend, sgl-kernel-build-wheels]
needs: [check-changes, stage-a-test-1, sgl-kernel-build-wheels]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on: 1-gpu-runner
Expand Down Expand Up @@ -562,7 +562,7 @@ jobs:
python3 run_suite.py --suite per-commit-8-gpu-h20 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

performance-test-1-gpu-part-1:
needs: [check-changes, sgl-kernel-build-wheels]
needs: [check-changes, sgl-kernel-build-wheels, stage-a-test-1]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on: 1-gpu-runner
Expand Down Expand Up @@ -623,7 +623,7 @@ jobs:
python3 -m unittest test_bench_serving.TestBenchServing.test_lora_online_latency_with_concurrent_adapter_updates

performance-test-1-gpu-part-2:
needs: [check-changes, sgl-kernel-build-wheels]
needs: [check-changes, sgl-kernel-build-wheels, stage-a-test-1]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on: 1-gpu-runner
Expand Down Expand Up @@ -676,7 +676,7 @@ jobs:
python3 -m unittest test_bench_serving.TestBenchServing.test_vlm_online_latency

performance-test-1-gpu-part-3:
needs: [check-changes, sgl-kernel-build-wheels]
needs: [check-changes, sgl-kernel-build-wheels, stage-a-test-1]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on: 1-gpu-runner
Expand Down Expand Up @@ -770,7 +770,7 @@ jobs:
python3 -m unittest test_bench_serving.TestBenchServing.test_pp_long_context_prefill

accuracy-test-1-gpu:
needs: [check-changes, sgl-kernel-build-wheels]
needs: [check-changes, sgl-kernel-build-wheels, stage-a-test-1]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on: 1-gpu-runner
Expand Down Expand Up @@ -965,7 +965,7 @@ jobs:

multimodal-gen-test,

unit-test-frontend,
stage-a-test-1,
unit-test-backend-1-gpu,
unit-test-backend-2-gpu,
unit-test-backend-4-gpu,
Expand Down
107 changes: 107 additions & 0 deletions python/sglang/test/ci/ci_register.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import ast
import warnings
from dataclasses import dataclass
from enum import Enum, auto
from typing import List


class HWBackend(Enum):
CUDA = auto()
AMD = auto()


@dataclass
class CIRegistry:
backend: HWBackend
filename: str
estimation_time: float
stage: str


def register_cuda_ci(estimation_time: float, ci_stage: str):
pass


def register_amd_ci(estimation_time: float, ci_stage: str):
pass


REGISTER_MAPPING = {
"register_cuda_ci": HWBackend.CUDA,
"register_amd_ci": HWBackend.AMD,
}


class RegistryVisitor(ast.NodeVisitor):
def __init__(self, filename: str):
self.filename = filename
self.registries: list[CIRegistry] = []

def _collect_ci_registry(self, func_call: ast.Call):
if not isinstance(func_call.func, ast.Name):
return None

if func_call.func.id not in REGISTER_MAPPING:
return None

hw = REGISTER_MAPPING[func_call.func.id]
est_time = None
ci_stage = None
for kw in func_call.keywords:
if kw.arg == "estimation_time":
if isinstance(kw.value, ast.Constant):
est_time = kw.value.value
elif kw.arg == "ci_stage":
if isinstance(kw.value, ast.Constant):
ci_stage = kw.value.value

for i, arg in enumerate(func_call.args):
if isinstance(arg, ast.Constant):
if i == 0:
est_time = arg.value
elif i == 1:
ci_stage = arg.value
assert (
est_time is not None
), "esimation_time is required and should be a constant"
assert ci_stage is not None, "ci_stage is required and should be a constant"
return CIRegistry(
backend=hw, filename=self.filename, estimation_time=est_time, stage=ci_stage
)

def visit_Module(self, node):
for stmt in node.body:
if not isinstance(stmt, ast.Expr) or not isinstance(stmt.value, ast.Call):
continue

cr = self._collect_ci_registry(stmt.value)
if cr is not None:
self.registries.append(cr)

self.generic_visit(node)


def ut_parse_one_file(filename: str) -> List[CIRegistry]:
with open(filename, "r") as f:
file_content = f.read()
tree = ast.parse(file_content, filename=filename)
visitor = RegistryVisitor(filename=filename)
visitor.visit(tree)
return visitor.registries


def collect_tests(files: list[str], sanity_check: bool = True) -> List[CIRegistry]:
ci_tests = []
for file in files:
registries = ut_parse_one_file(file)
if len(registries) == 0:
msg = f"No CI registry found in {file}"
if sanity_check:
raise ValueError(msg)
else:
warnings.warn(msg)
continue

ci_tests.extend(registries)

return ci_tests
134 changes: 134 additions & 0 deletions python/sglang/test/ci/ci_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import os
import subprocess
import threading
import time
from dataclasses import dataclass
from typing import Callable, List, Optional

from sglang.srt.utils.common import kill_process_tree


@dataclass
class TestFile:
name: str
estimated_time: float = 60


def run_with_timeout(
func: Callable,
args: tuple = (),
kwargs: Optional[dict] = None,
timeout: float = None,
):
"""Run a function with timeout."""
ret_value = []

def _target_func():
ret_value.append(func(*args, **(kwargs or {})))

t = threading.Thread(target=_target_func)
t.start()
t.join(timeout=timeout)
if t.is_alive():
raise TimeoutError()

if not ret_value:
raise RuntimeError()

return ret_value[0]


def run_unittest_files(
files: List[TestFile], timeout_per_file: float, continue_on_error: bool = False
):
"""
Run a list of test files.

Args:
files: List of TestFile objects to run
timeout_per_file: Timeout in seconds for each test file
continue_on_error: If True, continue running remaining tests even if one fails.
If False, stop at first failure (default behavior for PR tests).
"""
tic = time.perf_counter()
success = True
passed_tests = []
failed_tests = []

for i, file in enumerate(files):
filename, estimated_time = file.name, file.estimated_time
process = None

def run_one_file(filename):
nonlocal process

filename = os.path.join(os.getcwd(), filename)
print(
f".\n.\nBegin ({i}/{len(files) - 1}):\npython3 {filename}\n.\n.\n",
flush=True,
)
tic = time.perf_counter()

process = subprocess.Popen(
["python3", filename], stdout=None, stderr=None, env=os.environ
)
process.wait()
elapsed = time.perf_counter() - tic

print(
f".\n.\nEnd ({i}/{len(files) - 1}):\n{filename=}, {elapsed=:.0f}, {estimated_time=}\n.\n.\n",
flush=True,
)
return process.returncode

try:
ret_code = run_with_timeout(
run_one_file, args=(filename,), timeout=timeout_per_file
)
if ret_code != 0:
print(
f"\n✗ FAILED: {filename} returned exit code {ret_code}\n",
flush=True,
)
success = False
failed_tests.append((filename, f"exit code {ret_code}"))
if not continue_on_error:
# Stop at first failure for PR tests
break
# Otherwise continue to next test for nightly tests
else:
passed_tests.append(filename)
except TimeoutError:
kill_process_tree(process.pid)
time.sleep(5)
print(
f"\n✗ TIMEOUT: {filename} after {timeout_per_file} seconds\n",
flush=True,
)
success = False
failed_tests.append((filename, f"timeout after {timeout_per_file}s"))
if not continue_on_error:
# Stop at first timeout for PR tests
break
# Otherwise continue to next test for nightly tests

if success:
print(f"Success. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)
else:
print(f"Fail. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)

# Print summary
print(f"\n{'='*60}", flush=True)
print(f"Test Summary: {len(passed_tests)}/{len(files)} passed", flush=True)
print(f"{'='*60}", flush=True)
if passed_tests:
print("✓ PASSED:", flush=True)
for test in passed_tests:
print(f" {test}", flush=True)
if failed_tests:
print("\n✗ FAILED:", flush=True)
for test, reason in failed_tests:
print(f" {test} ({reason})", flush=True)
print(f"{'='*60}\n", flush=True)

return 0 if success else -1
Loading
Loading