sgl-project · Fridge003 · Jan 22, 2026 · Jan 21, 2026 · Jan 21, 2026 · Jan 21, 2026
@@ -6,6 +6,8 @@ ARG VER_SGLANG=main
 
 ARG VER_TORCH=2.9.0
 ARG VER_TORCHVISION=0.24.0
+ARG VER_TORCHAUDIO=2.9.0
+ARG VER_TORCHAO=0.14.1
 ARG VER_TRITON=3.5.0
 
 RUN apt-get update && \
@@ -31,7 +33,7 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
     source $HOME/.local/bin/env && \
     uv venv --python 3.12
 
-RUN echo -e '[[index]]\nname = "torch"\nurl = "https://download.pytorch.org/whl/cpu"\n\n[[index]]\nname = "torchvision"\nurl = "https://download.pytorch.org/whl/cpu"\n\n[[index]]\nname = "triton"\nurl = "https://download.pytorch.org/whl/cpu"' > .venv/uv.toml
+RUN echo -e '[[index]]\nname = "torch"\nurl = "https://download.pytorch.org/whl/cpu"\n\n[[index]]\nname = "torchvision"\nurl = "https://download.pytorch.org/whl/cpu"\n\n[[index]]\nname = "torchaudio"\nurl = "https://download.pytorch.org/whl/cpu"\n\n[[index]]\nname = "triton"\nurl = "https://download.pytorch.org/whl/cpu"' > .venv/uv.toml
 
 ENV UV_CONFIG_FILE=/opt/.venv/uv.toml
 
@@ -44,7 +46,7 @@ RUN source $HOME/.local/bin/env && \
     cd python && \
     cp pyproject_cpu.toml pyproject.toml && \
     uv pip install . && \
-    uv pip install torch==${VER_TORCH} torchvision==${VER_TORCHVISION} triton==${VER_TRITON} --force-reinstall && \
+    uv pip install torch==${VER_TORCH} torchvision==${VER_TORCHVISION} torchaudio==${VER_TORCHAUDIO} torchao==${VER_TORCHAO} triton==${VER_TRITON} --force-reinstall && \
     uv pip install tabulate && \
     cd ../sgl-kernel && \
     cp pyproject_cpu.toml pyproject.toml && \

diff --git a/docs/platforms/cpu_server.md b/docs/platforms/cpu_server.md
@@ -92,6 +92,10 @@ url = "https://download.pytorch.org/whl/cpu"
 name = "torchvision"
 url = "https://download.pytorch.org/whl/cpu"
 
+[[index]]
+name = "torchaudio"
+url = "https://download.pytorch.org/whl/cpu"
+
 [[index]]
 name = "triton"
 url = "https://download.pytorch.org/whl/cpu"
@@ -119,7 +123,7 @@ cp pyproject_cpu.toml pyproject.toml
 # Install SGLang dependent libs, and build SGLang main package
 uv pip install --upgrade pip setuptools
 uv pip install .
-uv pip install torch==2.9.0 torchvision==0.24.0 triton==3.5.0 --force-reinstall
+uv pip install torch==2.9.0 torchvision==0.24.0 torchaudio==2.9.0 torchao==0.14.1 triton==3.5.0 --force-reinstall
 
 # Build the CPU backend kernels
 cd ../sgl-kernel

diff --git a/python/pyproject_cpu.toml b/python/pyproject_cpu.toml
@@ -4,7 +4,7 @@ requires = ["setuptools>=61.0", "setuptools-scm>=8.0", "wheel", "grpcio-tools==1
 build-backend = "setuptools.build_meta"
 
 [project]
-name = "sglang"
+name = "sglang-cpu"
 dynamic = ["version"]
 description = "SGLang is a fast serving framework for large language models and vision language models."
 readme = "README.md"

@@ -238,8 +238,21 @@ def __init__(
         self.cpu_group = None
         self.local_size = get_int_env_var("LOCAL_SIZE", 0)
 
+        if is_cuda_alike():
+            device_id = (
+                0 if envs.SGLANG_ONE_VISIBLE_DEVICE_PER_PROCESS.get() else local_rank
+            )
+            self.device = torch.device(f"cuda:{device_id}")
+        elif _is_npu:
+            self.device = torch.device(f"npu:{local_rank}")
+        elif _is_xpu:
+            self.device = torch.device(f"xpu:{local_rank}")
+        else:
+            self.device = torch.device("cpu")
+        self.device_module = torch.get_device_module(self.device)
+
         for ranks in group_ranks:
-            active_ranks = torch.ones(len(ranks), dtype=torch.int32, device="cuda")
+            active_ranks = torch.ones(len(ranks), dtype=torch.int32, device=self.device)
             active_ranks_cpu = torch.ones(len(ranks), dtype=torch.int32)
             if "mooncake" in torch_distributed_backend:
                 from mooncake.ep import MooncakeBackendOptions
@@ -275,17 +288,6 @@ def __init__(
         assert self.cpu_group is not None
         assert self.device_group is not None
 
-        if is_cuda_alike():
-            device_id = (
-                0 if envs.SGLANG_ONE_VISIBLE_DEVICE_PER_PROCESS.get() else local_rank
-            )
-            self.device = torch.device(f"cuda:{device_id}")
-        elif _is_npu:
-            self.device = torch.device(f"npu:{local_rank}")
-        else:
-            self.device = torch.device("cpu")
-        self.device_module = torch.get_device_module(self.device)
-
         # Import communicators
         self.use_pynccl = use_pynccl
         self.pynccl_use_current_stream = pynccl_use_current_stream

@@ -1,13 +1,13 @@
 [build-system]
 requires = [
   "scikit-build-core>=0.10",
-  "torch>=2.7.1",
+  "torch==2.9.0",
   "wheel",
 ]
 build-backend = "scikit_build_core.build"
 
 [project]
-name = "sgl-kernel"
+name = "sgl-kernel-cpu"
 version = "0.3.21"
 description = "Kernel Library for SGLang"
 readme = "README.md"
@@ -33,3 +33,4 @@ exclude = [
 cmake.source-dir = "csrc/cpu"
 cmake.build-type = "Release"
 minimum-version = "build-system.requires"
+wheel.packages = ["python/sgl_kernel"]