Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
e04c2d9
Create Dockerfil.xeon
DiweiSun Apr 9, 2025
0b05058
Rename Dockerfil.xeon to Dockerfile.xeon
DiweiSun Apr 9, 2025
96b7fee
enable device auto detection for cuda/cpu/intel-gpu/rocm
DiweiSun May 12, 2025
a6328b3
Delete docker/Dockerfile.xeon
DiweiSun May 12, 2025
f405aac
lint format fix
DiweiSun May 13, 2025
47f46e3
Merge branch 'main' into molly/cpu_ut
DiweiSun May 13, 2025
c397a5f
add Dockerfile for Xeon
ZailiWang May 20, 2025
6db5e06
Update run_suite.py
DiweiSun May 20, 2025
16e3f07
Merge branch 'sgl-project:main' into main
ZailiWang May 21, 2025
9c8b3b8
add autotask yml file
ZailiWang May 21, 2025
7bde01a
Merge branch 'main' into main
ZailiWang May 21, 2025
0856e0c
Merge branch 'main' into main
ZailiWang May 22, 2025
3f9c509
Merge branch 'main' into main
ZailiWang May 23, 2025
850e128
Merge branch 'main' into main
ZailiWang May 23, 2025
e97287b
replace setup.py
ZailiWang May 23, 2025
ef631d2
Merge branch 'main' into main
ZailiWang May 23, 2025
c00ace3
fix dep. issue in test_topk
ZailiWang May 26, 2025
f083009
Merge branch 'main' into molly/cpu_ut
DiweiSun May 26, 2025
6f3106b
Merge pull request #5 from ZailiWang/main
DiweiSun May 26, 2025
660b9c5
install torch_cpu separately since --index-url cannot be set in toml
ZailiWang May 26, 2025
f02d1d6
Merge branch 'main' into main
ZailiWang May 26, 2025
e785e76
enable cpu ci
DiweiSun May 27, 2025
39b907d
docker build with local hardware
DiweiSun May 27, 2025
00ef105
docker build with local hardware
DiweiSun May 27, 2025
2693254
bugfix for workspace
DiweiSun May 27, 2025
996246a
bugfix for container clean
DiweiSun May 27, 2025
3539a0d
remove test_rope.py in cpu ci
DiweiSun May 27, 2025
6652226
remove proxy setting and lint format fix
DiweiSun May 27, 2025
e44e1c6
Merge branch 'main' into molly/cpu_ut
DiweiSun May 27, 2025
e71802e
UT TEST: raise error when server launch process dead
DiweiSun May 27, 2025
3f77c78
enable ci on xeon
DiweiSun May 27, 2025
0d02e79
skip CPU test if AMX not supported
ZailiWang May 27, 2025
cd7f3f7
Merge branch 'main' into main
ZailiWang May 27, 2025
232b5c7
fix lint error
ZailiWang May 28, 2025
2bace09
Merge branch 'main' of https://github.com/ZailiWang/sglang
ZailiWang May 28, 2025
aedeae2
Merge branch 'main' into main
zhyncs May 28, 2025
5c79c04
Merge branch 'main' into main
zhyncs May 30, 2025
9b0087b
Merge branch 'main' into main
zhyncs Jun 4, 2025
a3c804b
Merge branch 'main' into main
zhyncs Jun 5, 2025
fd13d4b
upd
zhyncs Jun 5, 2025
9ee7a2c
upd
zhyncs Jun 5, 2025
1cf96d7
upd
zhyncs Jun 5, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions .github/workflows/pr-test-xeon.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
name: PR Test (Xeon)
on:
pull_request:
branches:
- main
workflow_dispatch:

concurrency:
group: pr-test-xeon-${{ github.ref }}
cancel-in-progress: true

jobs:
build-test:
if: github.event_name == 'pull_request'
runs-on: sgl-kernel-build-node
environment: 'prod'
strategy:
matrix:
build_type: ['all']
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Build and Push
run: |
version=$(cat python/sglang/version.py | cut -d'"' -f2)
tag=v${version}-xeon

docker build . -f docker/Dockerfile.xeon -t sglang_xeon --no-cache
unit-test:
if: github.event_name == 'pull_request'
needs: [build-test]
runs-on: sgl-kernel-build-node
steps:
- name: Run container
run: |
docker run -dt \
-v ${{ github.workspace }}:/sglang-checkout/ --ipc=host \
--name ci_sglang_xeon \
sglang_xeon

- name: Install Dependency
timeout-minutes: 20
run: |
docker exec ci_sglang_xeon bash -c "python3 -m pip install --upgrade pip"
docker exec ci_sglang_xeon pip uninstall sgl-kernel -y || true
docker exec -w /sglang-checkout/sgl-kernel ci_sglang_xeon bash -c "cp pyproject_cpu.toml pyproject.toml && pip install -v ."
docker exec -w /sglang-checkout/ ci_sglang_xeon bash -c "pip install -e "python[all_cpu]""
docker exec ci_sglang_xeon bash -c "python3 -m pip install pytest expecttest"

- name: Check AMX Support
id: check_amx
timeout-minutes: 5
run: |
docker exec -w /sglang-checkout/ ci_sglang_xeon \
bash -c "python3 -c 'import torch; import sgl_kernel; assert torch._C._cpu._is_amx_tile_supported(); assert hasattr(torch.ops.sgl_kernel, \"convert_weight_packed\"); '"
continue-on-error: true

- name: Run UT Cases
if: steps.check_amx.outcome == 'success'
timeout-minutes: 20
run: |
docker exec -w /sglang-checkout/ ci_sglang_xeon \
bash -c "cd ./test/srt && python3 run_suite.py --suite per-commit-cpu"

- name: Cleanup container
if: always()
run: |
docker rm -f ci_sglang_xeon || true

finish:
if: always()
needs: [build-test, unit-test]
runs-on: ubuntu-24.04
steps:
- name: Check all dependent job statuses
run: |
results=(${{ join(needs.*.result, ' ') }})
for result in "${results[@]}"; do
if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
echo "Job failed with result: $result"
exit 1
fi
done
echo "All jobs completed successfully"
exit 0
35 changes: 35 additions & 0 deletions .github/workflows/release-docker-xeon.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Release Docker Images
on:
push:
branches:
- main
paths:
- "python/sglang/version.py"
workflow_dispatch:

jobs:
publish:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-24.04
environment: 'prod'
strategy:
matrix:
build_type: ['all']
steps:

- name: Checkout repository
uses: actions/checkout@v4

- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build and Push
run: |
version=$(cat python/sglang/version.py | cut -d'"' -f2)
tag=v${version}-xeon

docker build . -f docker/Dockerfile.xeon -t lmsysorg/sglang:${tag} --no-cache
docker push lmsysorg/sglang:${tag}
44 changes: 44 additions & 0 deletions docker/Dockerfile.xeon
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
FROM ubuntu:24.04
SHELL ["/bin/bash", "-c"]

ARG VER_SGLANG=main
ARG VER_TORCH=2.6.0
ARG VER_TORCHVISION=0.21.0

RUN apt-get update && \
apt-get full-upgrade -y && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
ca-certificates \
git \
curl \
wget \
vim \
gcc \
g++ \
make

WORKDIR /sgl-workspace

RUN curl -fsSL -v -o miniforge.sh -O https://github.com/conda-forge/miniforge/releases/download/24.11.3-2/Miniforge3-24.11.3-2-Linux-x86_64.sh && \
bash miniforge.sh -b -p ./miniforge3 && \
rm -f miniforge.sh && \
. miniforge3/bin/activate && \
conda install -y libsqlite==3.48.0 gperftools tbb libnuma numactl

ENV PATH=/sgl-workspace/miniforge3/bin:/sgl-workspace/miniforge3/condabin:${PATH}
ENV PIP_ROOT_USER_ACTION=ignore

RUN pip install intel-openmp

RUN git clone https://github.com/sgl-project/sglang.git && \
cd sglang && \
git checkout ${VER_SGLANG} && \
pip install -e "python[all_cpu]" && \
pip install torch==${VER_TORCH} torchvision==${VER_TORCHVISION} --index-url https://download.pytorch.org/whl/cpu --force-reinstall && \
cd sgl-kernel && \
cp pyproject_cpu.toml pyproject.toml && \
pip install -v .

ENV LD_PRELOAD=/sgl-workspace/miniforge3/lib/libiomp5.so:/sgl-workspace/miniforge3/lib/libtcmalloc.so:/sgl-workspace/miniforge3/lib/libtbbmalloc.so.2

WORKDIR /sgl-workspace/sglang
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ srt_hpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11"]
# CPU: currently, there are no pre-built vllm wheels for CPU.
# To install vllm for CPU, please follow the instruction here:
# https://docs.vllm.ai/en/latest/getting_started/installation/cpu/index.html
srt_cpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11", "torch"]
srt_cpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11", "einops"]
# https://vllm-ascend.readthedocs.io/en/latest/installation.html
srt_npu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11"]

Expand Down
64 changes: 63 additions & 1 deletion python/sglang/test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.srt.utils import (
get_bool_env_var,
get_device,
is_port_available,
kill_process_tree,
retry,
Expand Down Expand Up @@ -305,13 +306,33 @@ def add_common_other_args_and_parse(parser: argparse.ArgumentParser):
return args


def auto_config_device() -> str:
"""Auto-config available device platform"""

try:
device = get_device()
except (RuntimeError, ImportError) as e:
print(f"Warning: {e} - Falling back to CPU")
device = "cpu"

return device


def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
parser.add_argument("--parallel", type=int, default=64)
parser.add_argument("--host", type=str, default="http://127.0.0.1")
parser.add_argument("--port", type=int, default=30000)
parser.add_argument("--backend", type=str, default="srt")
parser.add_argument(
"--device",
type=str,
default="auto",
choices=["auto", "cuda", "rocm", "cpu"],
help="Device type (auto/cuda/rocm/cpu). Auto will detect available platforms",
)
parser.add_argument("--result-file", type=str, default="result.jsonl")
args = parser.parse_args()

return args


Expand Down Expand Up @@ -397,11 +418,25 @@ def popen_launch_server(
base_url: str,
timeout: float,
api_key: Optional[str] = None,
other_args: list[str] = (),
other_args: list[str] = [],
env: Optional[dict] = None,
return_stdout_stderr: Optional[tuple] = None,
device: str = "auto",
pd_separated: bool = False,
):
"""Launch a server process with automatic device detection.

Args:
device: Device type ("auto", "cuda", "rocm" or "cpu").
If "auto", will detect available platforms automatically.
"""
# Auto-detect device if needed
if device == "auto":
device = auto_config_device()
print(f"Auto-configed device: {device}", flush=True)
other_args = list(other_args)
other_args += ["--device", str(device)]

_, host, port = base_url.split(":")
host = host[2:]

Expand Down Expand Up @@ -457,6 +492,15 @@ def popen_launch_server(
start_time = time.perf_counter()
with requests.Session() as session:
while time.perf_counter() - start_time < timeout:

return_code = process.poll()
if return_code is not None:
# Server failed to start (non-zero exit code) or crashed
raise Exception(
f"Server process exited with code {return_code}. "
"Check server logs for errors."
)

try:
headers = {
"Content-Type": "application/json; charset=utf-8",
Expand Down Expand Up @@ -627,6 +671,7 @@ def get_benchmark_args(
disable_stream=False,
disable_ignore_eos=False,
seed: int = 0,
device="auto",
pd_separated: bool = False,
):
return SimpleNamespace(
Expand Down Expand Up @@ -657,6 +702,7 @@ def get_benchmark_args(
profile=None,
lora_name=None,
prompt_suffix="",
device=device,
pd_separated=pd_separated,
)

Expand All @@ -676,7 +722,10 @@ def run_bench_serving(
disable_ignore_eos=False,
need_warmup=False,
seed: int = 0,
device="auto",
):
if device == "auto":
device = auto_config_device()
# Launch the server
base_url = DEFAULT_URL_FOR_TEST
process = popen_launch_server(
Expand All @@ -700,6 +749,7 @@ def run_bench_serving(
disable_stream=disable_stream,
disable_ignore_eos=disable_ignore_eos,
seed=seed,
device=device,
)

try:
Expand Down Expand Up @@ -750,6 +800,18 @@ def run_bench_serving_multi(


def run_bench_one_batch(model, other_args):
"""Launch a offline process with automatic device detection.

Args:
device: Device type ("auto", "cuda", "rocm" or "cpu").
If "auto", will detect available platforms automatically.
"""
# Auto-detect device if needed

device = auto_config_device()
print(f"Auto-configed device: {device}", flush=True)
other_args += ["--device", str(device)]

command = [
"python3",
"-m",
Expand Down
10 changes: 10 additions & 0 deletions test/srt/run_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,16 @@ class TestFile:
"per-commit-8-gpu-amd": [
TestFile("test_full_deepseek_v3.py", 250),
],
"per-commit-cpu": [
TestFile("cpu/test_activation.py"),
TestFile("cpu/test_decode.py"),
TestFile("cpu/test_extend.py"),
TestFile("cpu/test_gemm.py"),
TestFile("cpu/test_moe.py"),
TestFile("cpu/test_norm.py"),
TestFile("cpu/test_qkv_proj_with_rope.py"),
TestFile("cpu/test_shared_expert.py"),
],
"nightly": [
TestFile("test_nightly_gsm8k_eval.py"),
],
Expand Down
Loading