Skip to content
Merged
6 changes: 4 additions & 2 deletions docker/xeon.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ ARG VER_SGLANG=main

ARG VER_TORCH=2.9.0
ARG VER_TORCHVISION=0.24.0
ARG VER_TORCHAUDIO=2.9.0
ARG VER_TORCHAO=0.14.1
ARG VER_TRITON=3.5.0

RUN apt-get update && \
Expand All @@ -31,7 +33,7 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
source $HOME/.local/bin/env && \
uv venv --python 3.12

RUN echo -e '[[index]]\nname = "torch"\nurl = "https://download.pytorch.org/whl/cpu"\n\n[[index]]\nname = "torchvision"\nurl = "https://download.pytorch.org/whl/cpu"\n\n[[index]]\nname = "triton"\nurl = "https://download.pytorch.org/whl/cpu"' > .venv/uv.toml
RUN echo -e '[[index]]\nname = "torch"\nurl = "https://download.pytorch.org/whl/cpu"\n\n[[index]]\nname = "torchvision"\nurl = "https://download.pytorch.org/whl/cpu"\n\n[[index]]\nname = "torchaudio"\nurl = "https://download.pytorch.org/whl/cpu"\n\n[[index]]\nname = "triton"\nurl = "https://download.pytorch.org/whl/cpu"' > .venv/uv.toml

ENV UV_CONFIG_FILE=/opt/.venv/uv.toml

Expand All @@ -44,7 +46,7 @@ RUN source $HOME/.local/bin/env && \
cd python && \
cp pyproject_cpu.toml pyproject.toml && \
uv pip install . && \
uv pip install torch==${VER_TORCH} torchvision==${VER_TORCHVISION} triton==${VER_TRITON} --force-reinstall && \
uv pip install torch==${VER_TORCH} torchvision==${VER_TORCHVISION} torchaudio==${VER_TORCHAUDIO} torchao==${VER_TORCHAO} triton==${VER_TRITON} --force-reinstall && \
uv pip install tabulate && \
cd ../sgl-kernel && \
cp pyproject_cpu.toml pyproject.toml && \
Expand Down
6 changes: 5 additions & 1 deletion docs/platforms/cpu_server.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ url = "https://download.pytorch.org/whl/cpu"
name = "torchvision"
url = "https://download.pytorch.org/whl/cpu"

[[index]]
name = "torchaudio"
url = "https://download.pytorch.org/whl/cpu"

[[index]]
name = "triton"
url = "https://download.pytorch.org/whl/cpu"
Expand Down Expand Up @@ -119,7 +123,7 @@ cp pyproject_cpu.toml pyproject.toml
# Install SGLang dependent libs, and build SGLang main package
uv pip install --upgrade pip setuptools
uv pip install .
uv pip install torch==2.9.0 torchvision==0.24.0 triton==3.5.0 --force-reinstall
uv pip install torch==2.9.0 torchvision==0.24.0 torchaudio==2.9.0 torchao==0.14.1 triton==3.5.0 --force-reinstall

# Build the CPU backend kernels
cd ../sgl-kernel
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject_cpu.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ requires = ["setuptools>=61.0", "setuptools-scm>=8.0", "wheel", "grpcio-tools==1
build-backend = "setuptools.build_meta"

[project]
name = "sglang"
name = "sglang-cpu"
dynamic = ["version"]
description = "SGLang is a fast serving framework for large language models and vision language models."
readme = "README.md"
Expand Down
26 changes: 14 additions & 12 deletions python/sglang/srt/distributed/parallel_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,21 @@ def __init__(
self.cpu_group = None
self.local_size = get_int_env_var("LOCAL_SIZE", 0)

if is_cuda_alike():
device_id = (
0 if envs.SGLANG_ONE_VISIBLE_DEVICE_PER_PROCESS.get() else local_rank
)
self.device = torch.device(f"cuda:{device_id}")
elif _is_npu:
self.device = torch.device(f"npu:{local_rank}")
elif _is_xpu:
self.device = torch.device(f"xpu:{local_rank}")
else:
self.device = torch.device("cpu")
self.device_module = torch.get_device_module(self.device)

for ranks in group_ranks:
active_ranks = torch.ones(len(ranks), dtype=torch.int32, device="cuda")
active_ranks = torch.ones(len(ranks), dtype=torch.int32, device=self.device)
active_ranks_cpu = torch.ones(len(ranks), dtype=torch.int32)
if "mooncake" in torch_distributed_backend:
from mooncake.ep import MooncakeBackendOptions
Expand Down Expand Up @@ -275,17 +288,6 @@ def __init__(
assert self.cpu_group is not None
assert self.device_group is not None

if is_cuda_alike():
device_id = (
0 if envs.SGLANG_ONE_VISIBLE_DEVICE_PER_PROCESS.get() else local_rank
)
self.device = torch.device(f"cuda:{device_id}")
elif _is_npu:
self.device = torch.device(f"npu:{local_rank}")
else:
self.device = torch.device("cpu")
self.device_module = torch.get_device_module(self.device)

# Import communicators
self.use_pynccl = use_pynccl
self.pynccl_use_current_stream = pynccl_use_current_stream
Expand Down
5 changes: 3 additions & 2 deletions sgl-kernel/pyproject_cpu.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
[build-system]
requires = [
"scikit-build-core>=0.10",
"torch>=2.7.1",
"torch==2.9.0",
"wheel",
]
build-backend = "scikit_build_core.build"

[project]
name = "sgl-kernel"
name = "sgl-kernel-cpu"
version = "0.3.21"
description = "Kernel Library for SGLang"
readme = "README.md"
Expand All @@ -33,3 +33,4 @@ exclude = [
cmake.source-dir = "csrc/cpu"
cmake.build-type = "Release"
minimum-version = "build-system.requires"
wheel.packages = ["python/sgl_kernel"]
Loading