sgl-project · zhyncs · Apr 29, 2025 · Apr 15, 2025 · Apr 15, 2025 · Apr 15, 2025
diff --git a/docs/start/install.md b/docs/start/install.md
@@ -159,6 +159,21 @@ sky status --endpoint 30000 sglang
 3. To further scale up your deployment with autoscaling and failure recovery, check out the [SkyServe + SGLang guide](https://github.com/skypilot-org/skypilot/tree/master/llm/sglang#serving-llama-2-with-sglang-for-more-traffic-using-skyserve).
 </details>
 
+## Method 7: Compatibility with GH200/GB200 and Jetson for ubuntu 22.04 and Ubuntu 24.04
+
+#### SBSA
+```bash
+pip install --upgrade pip
+pip install uv
+uv pip install flashinfer-python "sglang[all]>=0.4.5" --index-url https://pypi.jetson-ai-lab.dev/sbsa/cu128
+```
+#### Jetson JetPack 6
+```bash
+pip install --upgrade pip
+pip install uv
+uv pip install flashinfer-python "sglang[all]>=0.4.5" --index-url https://pypi.jetson-ai-lab.dev/jp6/cu126
+```
+
 ## Common Notes
 
 - [FlashInfer](https://github.com/flashinfer-ai/flashinfer) is the default attention kernel backend. It only supports sm75 and above. If you encounter any FlashInfer-related issues on sm75+ devices (e.g., T4, A10, A100, L4, L40S, H100), please switch to other kernels by adding `--attention-backend triton --sampling-backend pytorch` and open an issue on GitHub.

diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -47,9 +47,9 @@ runtime_common = [
 srt = [
     "sglang[runtime_common]",
     "sgl-kernel==0.0.9.post2",
-    "flashinfer_python==0.2.3",
-    "torch==2.6.0",
-    "torchvision==0.21.0",
+    "flashinfer_python>=0.2.3",
+    "torch>=2.6.0",
+    "torchvision>=0.21.0",
     "cuda-python",
     "outlines>=0.0.44,<=0.1.11",
     "partial_json_parser",

diff --git a/python/sglang/srt/platforms/interface.py b/python/sglang/srt/platforms/interface.py
@@ -312,6 +312,7 @@ def get_cpu_architecture(cls) -> CpuArchEnum:
 
         return CpuArchEnum.OTHER if machine else CpuArchEnum.UNKNOWN
 
+
     @classmethod
     def is_pin_memory_available(cls) -> bool:
         """Checks whether pin memory is available on the current platform."""

@@ -5,6 +5,9 @@ PYTHON_VERSION=$1
 CUDA_VERSION=$2
 PYTHON_ROOT_PATH=/opt/python/cp${PYTHON_VERSION//.}-cp${PYTHON_VERSION//.}
 
+ARCH=$(uname -i)
+echo "ARCH:  $ARCH"
+
 if [ ${CUDA_VERSION} = "12.8" ]; then
    DOCKER_IMAGE="pytorch/manylinux2_28-builder:cuda${CUDA_VERSION}"
    TORCH_INSTALL="pip install --no-cache-dir --pre torch --index-url https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION//.}"
@@ -20,10 +23,10 @@ docker run --rm \
    # Install CMake (version >= 3.26) - Robust Installation
    export CMAKE_VERSION_MAJOR=3.31
    export CMAKE_VERSION_MINOR=1
-   echo \"Downloading CMake from: https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-x86_64.tar.gz\"
-   wget https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-x86_64.tar.gz
-   tar -xzf cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-x86_64.tar.gz
-   mv cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-x86_64 /opt/cmake
+   echo \"Downloading CMake from: https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz\"
+   wget https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz
+   tar -xzf cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz
+   mv cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH} /opt/cmake
    export PATH=/opt/cmake/bin:\$PATH
 
    # Debugging CMake
@@ -35,8 +38,9 @@ docker run --rm \
    ${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core && \
    export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX' && \
    export CUDA_VERSION=${CUDA_VERSION} && \
-   mkdir -p /usr/lib/x86_64-linux-gnu/ && \
-   ln -s /usr/local/cuda-${CUDA_VERSION}/targets/x86_64-linux/lib/stubs/libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so && \
+   mkdir -p /usr/lib/${ARCH}-linux-gnu/ && \
+   ln -s /usr/local/cuda-${CUDA_VERSION}/targets/x86_64-linux/lib/stubs/libcuda.so /usr/lib/${ARCH}-linux-gnu/libcuda.so && \
+
    cd /sgl-kernel && \
    ls -la ${PYTHON_ROOT_PATH}/lib/python${PYTHON_VERSION}/site-packages/wheel/ && \
    PYTHONPATH=${PYTHON_ROOT_PATH}/lib/python${PYTHON_VERSION}/site-packages ${PYTHON_ROOT_PATH}/bin/python -m uv build --wheel -Cbuild-dir=build . --color=always --no-build-isolation && \

@@ -2,12 +2,13 @@
 import os
 
 import torch
+import platform
 
-if os.path.exists("/usr/local/cuda/targets/x86_64-linux/lib/libcudart.so.12"):
-    ctypes.CDLL(
-        "/usr/local/cuda/targets/x86_64-linux/lib/libcudart.so.12",
-        mode=ctypes.RTLD_GLOBAL,
-    )
+SYSTEM_ARCH = platform.machine()
+
+cuda_path = f"/usr/local/cuda/targets/{SYSTEM_ARCH}-linux/lib/libcudart.so.12"
+if os.path.exists(cuda_path):
+    ctypes.CDLL(cuda_path, mode=ctypes.RTLD_GLOBAL)
 
 from sgl_kernel import common_ops
 from sgl_kernel.allreduce import *

@@ -16,6 +16,7 @@
 import os
 import shutil
 import sys
+import platform
 from pathlib import Path
 
 import torch
@@ -24,10 +25,19 @@
 from torch.utils.cpp_extension import BuildExtension, CppExtension
 
 root = Path(__file__).parent.resolve()
+arch = platform.machine().lower()
 
-if "bdist_wheel" in sys.argv and "--plat-name" not in sys.argv:
-    sys.argv.extend(["--plat-name", "manylinux2014_x86_64"])
+if arch in ("x86_64", "amd64"):
+    plat_name = "manylinux2014_x86_64"
+elif arch in ("aarch64", "arm64"):
+    plat_name = "manylinux2014_aarch64"
+elif arch.startswith("ppc"):
+    plat_name = "manylinux2014_ppc64le"
+else:
+    plat_name = f"manylinux2014_{arch}"
 
+if "bdist_wheel" in sys.argv and "--plat-name" not in sys.argv:
+    sys.argv.extend(["--plat-name", plat_name])
 
 def _get_version():
     with open(root / "pyproject.toml") as f:
@@ -70,7 +80,7 @@ def _get_version():
 }
 Extension = CppExtension
 
-extra_link_args = ["-Wl,-rpath,$ORIGIN/../../torch/lib", "-L/usr/lib/x86_64-linux-gnu"]
+extra_link_args = ["-Wl,-rpath,$ORIGIN/../../torch/lib", f"-L/usr/lib/{arch}-linux-gnu"]
 
 ext_modules = [
     Extension(

@@ -14,12 +14,14 @@
 # ==============================================================================
 
 import sys
+import platform
 from pathlib import Path
 
 from setuptools import find_packages, setup
 from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 
 root = Path(__file__).parent.resolve()
+arch = platform.machine().lower()
 
 
 def _get_version():
@@ -45,7 +47,7 @@ def _get_version():
 
 cxx_flags = ["-O3"]
 libraries = ["hiprtc", "amdhip64", "c10", "torch", "torch_python"]
-extra_link_args = ["-Wl,-rpath,$ORIGIN/../../torch/lib", "-L/usr/lib/x86_64-linux-gnu"]
+extra_link_args = ["-Wl,-rpath,$ORIGIN/../../torch/lib", f"-L/usr/lib/{arch}-linux-gnu"]
 
 hipcc_flags = [
     "-DNDEBUG",