vllm-project · AethoceSora · Apr 27, 2026 · Apr 27, 2026 · Apr 30, 2026 · May 8, 2026
@@ -199,6 +199,9 @@ COPY requirements/cuda.txt requirements/cuda.txt
 COPY use_existing_torch.py use_existing_torch.py
 COPY pyproject.toml pyproject.toml
 RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "13" ]; then \
+        sed -i 's/^flashinfer-python==/flashinfer-python[cu13]==/' requirements/cuda.txt; \
+    fi; \
     if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
         echo "Installing torch nightly..." \
         && uv pip install --python /opt/venv/bin/python3 torch torchaudio torchvision --pre \
@@ -616,14 +619,17 @@ ARG PYTORCH_CUDA_INDEX_BASE_URL
 COPY requirements/common.txt /tmp/common.txt
 COPY requirements/cuda.txt /tmp/requirements-cuda.txt
 RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "13" ]; then \
+        sed -i 's/^flashinfer-python==/flashinfer-python[cu13]==/' /tmp/requirements-cuda.txt; \
+    fi; \
     uv pip install --system -r /tmp/requirements-cuda.txt \
         --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && \
     rm /tmp/requirements-cuda.txt /tmp/common.txt
 
 # Install FlashInfer JIT cache (requires CUDA-version-specific index URL)
 # https://docs.flashinfer.ai/installation.html
 # From versions.json: .flashinfer.version
-ARG FLASHINFER_VERSION=0.6.8.post1
+ARG FLASHINFER_VERSION=0.6.11
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system flashinfer-jit-cache==${FLASHINFER_VERSION} \
         --extra-index-url https://flashinfer.ai/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')

diff --git a/docker/Dockerfile.nightly_torch b/docker/Dockerfile.nightly_torch
@@ -217,13 +217,13 @@ RUN pip install setuptools==75.6.0 packaging==23.2 ninja==1.11.1.3 build==1.2.2.
 
 
 # build flashinfer for torch nightly from source around 10 mins
-# release version: v0.6.8.post1
+# release version: v0.6.11
 # todo(elainewy): cache flashinfer build result for faster build
 ENV CCACHE_DIR=/root/.cache/ccache
 RUN --mount=type=cache,target=/root/.cache/ccache \
     --mount=type=cache,target=/root/.cache/uv \
     echo "git clone flashinfer..." \
-    && git clone --depth 1 --branch v0.6.8.post1 --recursive https://github.com/flashinfer-ai/flashinfer.git \
+    && git clone --depth 1 --branch v0.6.11 --recursive https://github.com/flashinfer-ai/flashinfer.git \
     && cd flashinfer \
     && git submodule update --init --recursive \
     && echo "finish git clone flashinfer..." \

diff --git a/docker/versions.json b/docker/versions.json
@@ -68,7 +68,7 @@
       "default": "true"
     },
     "FLASHINFER_VERSION": {
-      "default": "0.6.8.post1"
+      "default": "0.6.11"
     },
     "GDRCOPY_CUDA_VERSION": {
       "default": "12.8"

diff --git a/requirements/cuda.txt b/requirements/cuda.txt
@@ -9,8 +9,8 @@ torchaudio==2.11.0
 # These must be updated alongside torch
 torchvision==0.26.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
 # FlashInfer should be updated together with the Dockerfile
-flashinfer-python==0.6.8.post1
-flashinfer-cubin==0.6.8.post1
+flashinfer-python==0.6.11
+flashinfer-cubin==0.6.11
 apache-tvm-ffi==0.1.9
 tilelang==0.1.9
 # Cap nvidia-cudnn-frontend (transitive dep of flashinfer) due to

diff --git a/setup.py b/setup.py
@@ -965,6 +965,8 @@ def _read_requirements(filename: str) -> list[str]:
         cuda_major, cuda_minor = torch.version.cuda.split(".")
         modified_requirements = []
         for req in requirements:
+            if cuda_major == "13" and req.startswith("flashinfer-python=="):
+                req = req.replace("flashinfer-python==", "flashinfer-python[cu13]==", 1)
             if "vllm-flash-attn" in req and cuda_major != "12":
                 # vllm-flash-attn is built only for CUDA 12.x.
                 # Skip for other versions.