sgl-project · Fridge003 · Feb 15, 2026 · Feb 15, 2026 · Feb 16, 2026 · Feb 16, 2026
@@ -34,9 +34,28 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@v4
 
+      - name: Build sgl-kernel locally for PyTorch 2.10 compatibility
+        timeout-minutes: 30
+        run: |
+          # Install CMake 3.26+ required by sgl-kernel
+          pip install "cmake>=3.26"
+          cmake --version
+          cd sgl-kernel
+          pip install scikit-build-core>=0.10 ninja wheel numpy uv
+          pip install torch==2.10.0 --index-url https://download.pytorch.org/whl/cu129
+          make build
+          # Rename wheel to match expected pattern (linux_x86_64 -> manylinux2014_x86_64)
+          for whl in dist/sgl_kernel-*-linux_x86_64.whl; do
+            if [ -f "$whl" ]; then
+              newname=$(echo "$whl" | sed 's/linux_x86_64/manylinux2014_x86_64/')
+              mv "$whl" "$newname"
+              echo "Renamed $whl to $newname"
+            fi
+          done
+
       - name: Install dependencies
         run: |
-          bash scripts/ci/cuda/ci_install_dependency.sh
+          CUSTOM_BUILD_SGL_KERNEL=true bash scripts/ci/cuda/ci_install_dependency.sh
           pip install -r docs/requirements.txt
           apt-get update && apt-get install -y pandoc parallel retry
           ln -sf "$(which python3)" /usr/bin/python

@@ -804,13 +804,15 @@ jobs:
   # =============================================== jit-kernel ====================================================
 
   jit-kernel-unit-test:
-    needs: [check-changes, call-gate]
+    needs: [check-changes, call-gate, sgl-kernel-build-wheels]
     # Skip for scheduled runs and when target_stage is set
     if: |
+      always() &&
       github.event_name != 'schedule' &&
       inputs.test_parallel_dispatch != true &&
       !inputs.target_stage &&
-      needs.check-changes.outputs.jit_kernel == 'true'
+      needs.check-changes.outputs.jit_kernel == 'true' &&
+      (needs.sgl-kernel-build-wheels.result == 'success' || needs.sgl-kernel-build-wheels.result == 'skipped')
     runs-on: 1-gpu-runner
     timeout-minutes: 240
     env:
@@ -820,10 +822,43 @@ jobs:
         with:
           ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
 
+      - name: Clean up disk
+        run: |
+          ls -alh sgl-kernel/dist || true
+          rm -rf sgl-kernel/dist/* || true
+
+      - name: Download artifacts
+        if: needs.check-changes.outputs.sgl_kernel == 'true'
+        uses: actions/download-artifact@v4
+        with:
+          path: sgl-kernel/dist/
+          merge-multiple: true
+          pattern: wheel-python3.10-cuda12.9
+
+      - name: Build sgl-kernel locally if wheel not available
+        if: needs.check-changes.outputs.sgl_kernel != 'true'
+        timeout-minutes: 30
+        run: |
+          # Install CMake 3.26+ required by sgl-kernel
+          pip install "cmake>=3.26"
+          cmake --version
+          cd sgl-kernel
+          pip install scikit-build-core>=0.10 ninja wheel numpy uv
+          pip install torch==2.10.0 --index-url https://download.pytorch.org/whl/cu129
+          make build
+          # Rename wheel to match expected pattern (linux_x86_64 -> manylinux2014_x86_64)
+          for whl in dist/sgl_kernel-*-linux_x86_64.whl; do
+            if [ -f "$whl" ]; then
+              newname=$(echo "$whl" | sed 's/linux_x86_64/manylinux2014_x86_64/')
+              mv "$whl" "$newname"
+              echo "Renamed $whl to $newname"
+            fi
+          done
+
       - name: Install dependencies
         timeout-minutes: 20
         run: |
-          bash scripts/ci/cuda/ci_install_dependency.sh
+          CUSTOM_BUILD_SGL_KERNEL=true bash scripts/ci/cuda/ci_install_dependency.sh
 
       - name: Run test
         timeout-minutes: 30

@@ -65,10 +65,10 @@ dependencies = [
   "tiktoken",
   "timm==1.0.16",
   "torch_memory_saver==0.0.9",
-  "torch==2.9.1",
+  "torch==2.10.0",
   "torchao==0.9.0",
-  "torchaudio==2.9.1",
-  "torchcodec==0.9.1 ; sys_platform != 'linux' or (sys_platform == 'linux' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')", # torchcodec 0.9.1 for torch 2.9.x. Not available on Linux ARM.
+  "torchaudio==2.10.0",
+  "torchcodec==0.9.1 ; sys_platform != 'linux' or (sys_platform == 'linux' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')",
   "av ; sys_platform == 'linux' and (platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'armv7l')",
   "torchvision",
   "tqdm",

diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py
@@ -11,6 +11,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+# NOTE: PyTorch 2.10.0 still requires cuDNN >= 9.15 for Conv3D operations.
+# The cuDNN version check was removed from runtime but CI enforces
+# nvidia-cudnn-cu12==9.16.0.29 installation to avoid performance regression.
 """The arguments of the server."""
 
 from __future__ import annotations
@@ -5718,14 +5721,12 @@ def check_server_args(self):
             f"Invalid value: '{self.served_model_name}'"
         )
 
+        # Check cuDNN compatibility for PyTorch 2.10+
+        self.check_torch_cudnn_compatibility()
+
         # Check LoRA
         self.check_lora_server_args()
 
-        # torch 2.9.1 has compatibility issues with cuDNN 9.14 and below,
-        # causing extremely slow nn.Conv3d performance.
-        # TODO(yhyang201): Remove this check when sglang no longer uses torch 2.9.1.
-        self.check_torch_2_9_1_cudnn_compatibility()
-
         # Check speculative decoding
         if self.speculative_algorithm is not None:
             assert (
@@ -5834,48 +5835,61 @@ def check_server_args(self):
                 "When enabling two batch overlap, moe_a2a_backend cannot be 'none'."
             )
 
-    def check_torch_2_9_1_cudnn_compatibility(self):
+    def check_torch_cudnn_compatibility(self):
+        """Check cuDNN compatibility for PyTorch 2.10+.
+
+        PyTorch 2.10.0 ships with cuDNN 9.10.2.21 which has Conv3D performance regression.
+        This check warns users to upgrade to cuDNN 9.15+ for multimodal models.
+        """
         if get_bool_env_var("SGLANG_DISABLE_CUDNN_CHECK"):
             return
 
-        if self.get_model_config().is_multimodal:
-            import torch
+        # Only check for multimodal models which use Conv3D
+        if (
+            not hasattr(self, "get_model_config")
+            or not self.get_model_config().is_multimodal
+        ):
+            return
+
+        import torch
 
-            if torch_release[:3] == (2, 9, 1):
+        torch_version = torch.__version__.split("+", 1)[0]
+        # Check for PyTorch 2.10.0+ (which has the cuDNN issue)
+        if torch_version.startswith("2.10"):
+            cudnn_version = None
+            try:
+                cudnn_version = torch.backends.cudnn.version()
+            except Exception:
                 cudnn_version = None
-                try:
-                    cudnn_version = torch.backends.cudnn.version()
-                except Exception:
-                    cudnn_version = None
-                if cudnn_version is not None:
-                    version_float = float(str(cudnn_version)[:3]) / 100
-                    if version_float < 9.15:
-                        RED = "\033[91m"
-                        BOLD = "\033[1m"
-                        RESET = "\033[0m"
-                        msg = (
-                            f"{RED}{BOLD}"
-                            "CRITICAL WARNING: PyTorch 2.9.1 & CuDNN Compatibility Issue Detected\n"
-                            "--------------------------------------------------------------------------------\n"
-                            f"Current Environment: PyTorch {torch.__version__} | CuDNN {version_float:.2f}\n\n"
-                            "Issue:     There is a KNOWN BUG in PyTorch 2.9.1's `nn.Conv3d` implementation\n"
-                            "           when used with CuDNN versions older than 9.15. This can cause\n"
-                            "           SEVERE PERFORMANCE DEGRADATION and EXCESSIVE MEMORY USAGE.\n\n"
-                            "Reference: https://github.com/pytorch/pytorch/issues/168167\n\n"
-                            "Solution:  You MUST upgrade CuDNN to version 9.15+ to ensure correctness.\n\n"
-                            "Run the following command immediately to fix:\n"
-                            "    pip install nvidia-cudnn-cu12==9.16.0.29\n\n"
-                            "Or you can disable this check by setting env var SGLANG_DISABLE_CUDNN_CHECK=1\n"
-                            "--------------------------------------------------------------------------------\n"
-                            f"{RESET}"
-                        )
-                        raise RuntimeError(msg)
-                else:
+            if cudnn_version is not None:
+                version_float = float(str(cudnn_version)[:3]) / 100
+                if version_float < 9.15:
                     RED = "\033[91m"
+                    BOLD = "\033[1m"
                     RESET = "\033[0m"
-                    logger.warning(
-                        f"{RED}WARNING: Could not determine CuDNN version for torch==2.9.1. Please ensure CuDNN >= 9.15 to avoid nn.Conv3d bugs.{RESET}"
+                    msg = (
+                        f"{RED}{BOLD}"
+                        "CRITICAL WARNING: PyTorch 2.10.0 & CuDNN Compatibility Issue Detected\n"
+                        "--------------------------------------------------------------------------------\n"
+                        f"Current Environment: PyTorch {torch.__version__} | CuDNN {version_float:.2f}\n\n"
+                        "Issue:     PyTorch 2.10.0 ships with cuDNN 9.10.2.21 which has a KNOWN BUG\n"
+                        "           in `nn.Conv3d` implementation causing SEVERE PERFORMANCE DEGRADATION\n"
+                        "           and EXCESSIVE MEMORY USAGE in multimodal models.\n\n"
+                        "Solution:  You MUST upgrade CuDNN to version 9.15+ to ensure correctness.\n\n"
+                        "Run the following command immediately to fix:\n"
+                        "    pip install nvidia-cudnn-cu12==9.16.0.29\n\n"
+                        "Or you can disable this check by setting env var SGLANG_DISABLE_CUDNN_CHECK=1\n"
+                        "--------------------------------------------------------------------------------\n"
+                        f"{RESET}"
                     )
+                    raise RuntimeError(msg)
+            else:
+                RED = "\033[91m"
+                RESET = "\033[0m"
+                logger.warning(
+                    f"{RED}WARNING: Could not determine CuDNN version for torch==2.10.0. "
+                    f"Please ensure CuDNN >= 9.15 to avoid nn.Conv3d bugs.{RESET}"
+                )
 
     def check_lora_server_args(self):
         assert self.max_loras_per_batch > 0, "max_loras_per_batch must be positive"

diff --git a/scripts/ci/cuda/ci_install_dependency.sh b/scripts/ci/cuda/ci_install_dependency.sh
@@ -156,6 +156,7 @@ if [ -n "$OPTIONAL_DEPS" ]; then
 fi
 echo "Installing python extras: [${EXTRAS}]"
 
+$PIP_CMD uninstall torch || true
 $PIP_CMD install -e "python[${EXTRAS}]" --extra-index-url https://download.pytorch.org/whl/${CU_VERSION} $PIP_INSTALL_SUFFIX
 
 # Fix CUDA version mismatch between torch and torchaudio.
@@ -261,6 +262,15 @@ if [ "$IS_BLACKWELL" = "1" ]; then
 else
     $PIP_CMD install nvidia-cudnn-cu12==9.16.0.29 --force-reinstall $PIP_INSTALL_SUFFIX
 fi
+
+# Set LD_LIBRARY_PATH to use pip-installed cuDNN instead of PyTorch's bundled cuDNN
+# This is critical for PyTorch 2.10+ which ships with cuDNN 9.10.2.21 that has Conv3D performance issues
+CUDNN_PATH=$(python3 -c "import nvidia.cudnn; print(nvidia.cudnn.__file__)" 2>/dev/null | xargs dirname | xargs dirname 2>/dev/null || echo "")
+if [ -n "$CUDNN_PATH" ] && [ -d "$CUDNN_PATH/lib" ]; then
+    export LD_LIBRARY_PATH="$CUDNN_PATH/lib:$LD_LIBRARY_PATH"
+    echo "Set LD_LIBRARY_PATH to use pip-installed cuDNN: $CUDNN_PATH/lib"
+fi
+
 $PIP_CMD uninstall xformers || true
 
 # Install flashinfer-jit-cache with caching and retry logic (flashinfer.ai can have transient DNS issues)

@@ -79,10 +79,10 @@ RUN set -eux; \
 RUN --mount=type=cache,id=sgl-kernel-pip,target=/root/.cache/pip \
     set -eux; \
     case "${CUDA_VERSION}" in \
-      13.0) TORCH_VER=2.9.1; CU_TAG=cu130 ;; \
-      12.9) TORCH_VER=2.9.1; CU_TAG=cu128 ;; \
-      12.8) TORCH_VER=2.9.1; CU_TAG=cu128 ;; \
-      *)    TORCH_VER=2.9.1; CU_TAG=cu126 ;; \
+      13.0) TORCH_VER=2.10.0; CU_TAG=cu130 ;; \
+      12.9) TORCH_VER=2.10.0; CU_TAG=cu128 ;; \
+      12.8) TORCH_VER=2.10.0; CU_TAG=cu128 ;; \
+      *)    TORCH_VER=2.10.0; CU_TAG=cu126 ;; \
     esac; \
     ${PYTHON_ROOT_PATH}/bin/pip install torch==${TORCH_VER} --index-url https://${PYTORCH_MIRROR}/whl/${CU_TAG}; \
     ${PYTHON_ROOT_PATH}/bin/pip install ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core --index-url ${PIP_DEFAULT_INDEX}

@@ -12,7 +12,7 @@
 sgl-kernel provides optimized compute primitives for LLM inference engines, enabling efficient inference for large language models and vision-language models through custom kernel operations. It has been used by [LightLLM](https://github.com/ModelTC/LightLLM), [SGLang](https://github.com/sgl-project/sglang) and so on.
 
 ## Installation
-Requires torch == 2.9.1
+Requires torch == 2.10.0
 
 ```bash
 # Latest version