diff --git a/docker/xeon.Dockerfile b/docker/xeon.Dockerfile index 52ca31e4379a..f793db49a9ef 100644 --- a/docker/xeon.Dockerfile +++ b/docker/xeon.Dockerfile @@ -6,6 +6,8 @@ ARG VER_SGLANG=main ARG VER_TORCH=2.9.0 ARG VER_TORCHVISION=0.24.0 +ARG VER_TORCHAUDIO=2.9.0 +ARG VER_TORCHAO=0.14.1 ARG VER_TRITON=3.5.0 RUN apt-get update && \ @@ -31,7 +33,7 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ source $HOME/.local/bin/env && \ uv venv --python 3.12 -RUN echo -e '[[index]]\nname = "torch"\nurl = "https://download.pytorch.org/whl/cpu"\n\n[[index]]\nname = "torchvision"\nurl = "https://download.pytorch.org/whl/cpu"\n\n[[index]]\nname = "triton"\nurl = "https://download.pytorch.org/whl/cpu"' > .venv/uv.toml +RUN echo -e '[[index]]\nname = "torch"\nurl = "https://download.pytorch.org/whl/cpu"\n\n[[index]]\nname = "torchvision"\nurl = "https://download.pytorch.org/whl/cpu"\n\n[[index]]\nname = "torchaudio"\nurl = "https://download.pytorch.org/whl/cpu"\n\n[[index]]\nname = "triton"\nurl = "https://download.pytorch.org/whl/cpu"' > .venv/uv.toml ENV UV_CONFIG_FILE=/opt/.venv/uv.toml @@ -44,7 +46,7 @@ RUN source $HOME/.local/bin/env && \ cd python && \ cp pyproject_cpu.toml pyproject.toml && \ uv pip install . && \ - uv pip install torch==${VER_TORCH} torchvision==${VER_TORCHVISION} triton==${VER_TRITON} --force-reinstall && \ + uv pip install torch==${VER_TORCH} torchvision==${VER_TORCHVISION} torchaudio==${VER_TORCHAUDIO} torchao==${VER_TORCHAO} triton==${VER_TRITON} --force-reinstall && \ uv pip install tabulate && \ cd ../sgl-kernel && \ cp pyproject_cpu.toml pyproject.toml && \ diff --git a/docs/platforms/cpu_server.md b/docs/platforms/cpu_server.md index bd59a477b61b..6d6cce83cd70 100644 --- a/docs/platforms/cpu_server.md +++ b/docs/platforms/cpu_server.md @@ -92,6 +92,10 @@ url = "https://download.pytorch.org/whl/cpu" name = "torchvision" url = "https://download.pytorch.org/whl/cpu" +[[index]] +name = "torchaudio" +url = "https://download.pytorch.org/whl/cpu" + [[index]] name = "triton" url = "https://download.pytorch.org/whl/cpu" @@ -119,7 +123,7 @@ cp pyproject_cpu.toml pyproject.toml # Install SGLang dependent libs, and build SGLang main package uv pip install --upgrade pip setuptools uv pip install . -uv pip install torch==2.9.0 torchvision==0.24.0 triton==3.5.0 --force-reinstall +uv pip install torch==2.9.0 torchvision==0.24.0 torchaudio==2.9.0 torchao==0.14.1 triton==3.5.0 --force-reinstall # Build the CPU backend kernels cd ../sgl-kernel diff --git a/python/pyproject_cpu.toml b/python/pyproject_cpu.toml index 74e2b0c58de5..481f3b1f7e10 100644 --- a/python/pyproject_cpu.toml +++ b/python/pyproject_cpu.toml @@ -4,7 +4,7 @@ requires = ["setuptools>=61.0", "setuptools-scm>=8.0", "wheel", "grpcio-tools==1 build-backend = "setuptools.build_meta" [project] -name = "sglang" +name = "sglang-cpu" dynamic = ["version"] description = "SGLang is a fast serving framework for large language models and vision language models." readme = "README.md" diff --git a/python/sglang/srt/distributed/parallel_state.py b/python/sglang/srt/distributed/parallel_state.py index c63973689353..b01595526595 100644 --- a/python/sglang/srt/distributed/parallel_state.py +++ b/python/sglang/srt/distributed/parallel_state.py @@ -238,8 +238,21 @@ def __init__( self.cpu_group = None self.local_size = get_int_env_var("LOCAL_SIZE", 0) + if is_cuda_alike(): + device_id = ( + 0 if envs.SGLANG_ONE_VISIBLE_DEVICE_PER_PROCESS.get() else local_rank + ) + self.device = torch.device(f"cuda:{device_id}") + elif _is_npu: + self.device = torch.device(f"npu:{local_rank}") + elif _is_xpu: + self.device = torch.device(f"xpu:{local_rank}") + else: + self.device = torch.device("cpu") + self.device_module = torch.get_device_module(self.device) + for ranks in group_ranks: - active_ranks = torch.ones(len(ranks), dtype=torch.int32, device="cuda") + active_ranks = torch.ones(len(ranks), dtype=torch.int32, device=self.device) active_ranks_cpu = torch.ones(len(ranks), dtype=torch.int32) if "mooncake" in torch_distributed_backend: from mooncake.ep import MooncakeBackendOptions @@ -275,17 +288,6 @@ def __init__( assert self.cpu_group is not None assert self.device_group is not None - if is_cuda_alike(): - device_id = ( - 0 if envs.SGLANG_ONE_VISIBLE_DEVICE_PER_PROCESS.get() else local_rank - ) - self.device = torch.device(f"cuda:{device_id}") - elif _is_npu: - self.device = torch.device(f"npu:{local_rank}") - else: - self.device = torch.device("cpu") - self.device_module = torch.get_device_module(self.device) - # Import communicators self.use_pynccl = use_pynccl self.pynccl_use_current_stream = pynccl_use_current_stream diff --git a/sgl-kernel/pyproject_cpu.toml b/sgl-kernel/pyproject_cpu.toml index 9d7502a96a1f..90b86542beca 100644 --- a/sgl-kernel/pyproject_cpu.toml +++ b/sgl-kernel/pyproject_cpu.toml @@ -1,13 +1,13 @@ [build-system] requires = [ "scikit-build-core>=0.10", - "torch>=2.7.1", + "torch==2.9.0", "wheel", ] build-backend = "scikit_build_core.build" [project] -name = "sgl-kernel" +name = "sgl-kernel-cpu" version = "0.3.21" description = "Kernel Library for SGLang" readme = "README.md" @@ -33,3 +33,4 @@ exclude = [ cmake.source-dir = "csrc/cpu" cmake.build-type = "Release" minimum-version = "build-system.requires" +wheel.packages = ["python/sgl_kernel"]