Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
ARG CUDA_VERSION=13.0.2
ARG PYTHON_VERSION=3.12
ARG UBUNTU_VERSION=22.04
ARG FLASHINFER_VERSION=0.6.10

# By parameterizing the base images, we allow third-party to use their own
# base images. One use case is hermetic builds with base images stored in
Expand Down Expand Up @@ -101,6 +102,7 @@ FROM ${BUILD_BASE_IMAGE} AS base

ARG CUDA_VERSION
ARG PYTHON_VERSION
ARG FLASHINFER_VERSION
ARG BUILD_OS

ENV DEBIAN_FRONTEND=noninteractive
Expand Down Expand Up @@ -212,6 +214,14 @@ RUN --mount=type=cache,target=/root/.cache/uv \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
fi

# `flashinfer-python` is already installed via requirements/cuda.txt above;
# this only activates its `[cu13]` extra (cu13 deps for the SM100 GDN kernel).
RUN --mount=type=cache,target=/root/.cache/uv \
if [ "${CUDA_VERSION%%.*}" = "13" ]; then \
uv pip install --python /opt/venv/bin/python3 \
"flashinfer-python[cu13]==${FLASHINFER_VERSION}"; \
fi

# Track PyTorch lib versions used during build and match in downstream instances.
# We do this for both nightly and release so we can strip dependencies/*.txt as needed.
# Otherwise library dependencies can upgrade/downgrade torch incorrectly.
Expand Down Expand Up @@ -519,6 +529,7 @@ FROM ${FINAL_BASE_IMAGE} AS vllm-base

ARG CUDA_VERSION
ARG PYTHON_VERSION
ARG FLASHINFER_VERSION
ARG DEADSNAKES_MIRROR_URL
ARG DEADSNAKES_GPGKEY_URL
ARG GET_PIP_URL
Expand Down Expand Up @@ -620,10 +631,17 @@ RUN --mount=type=cache,target=/root/.cache/uv \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && \
rm /tmp/requirements-cuda.txt /tmp/common.txt

# `flashinfer-python` is already installed via requirements/cuda.txt above;
# this only activates its `[cu13]` extra (cu13 deps for the SM100 GDN kernel).
RUN --mount=type=cache,target=/root/.cache/uv \
if [ "${CUDA_VERSION%%.*}" = "13" ]; then \
uv pip install --system \
"flashinfer-python[cu13]==${FLASHINFER_VERSION}"; \
fi

# Install FlashInfer JIT cache (requires CUDA-version-specific index URL)
# https://docs.flashinfer.ai/installation.html
# From versions.json: .flashinfer.version
ARG FLASHINFER_VERSION=0.6.8.post1
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system flashinfer-jit-cache==${FLASHINFER_VERSION} \
--extra-index-url https://flashinfer.ai/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile.nightly_torch
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,13 @@ RUN pip install setuptools==75.6.0 packaging==23.2 ninja==1.11.1.3 build==1.2.2.


# build flashinfer for torch nightly from source around 10 mins
# release version: v0.6.8.post1
# release version: v0.6.10
# todo(elainewy): cache flashinfer build result for faster build
ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/root/.cache/uv \
echo "git clone flashinfer..." \
&& git clone --depth 1 --branch v0.6.8.post1 --recursive https://github.com/flashinfer-ai/flashinfer.git \
&& git clone --depth 1 --branch v0.6.10 --recursive https://github.com/flashinfer-ai/flashinfer.git \
&& cd flashinfer \
&& git submodule update --init --recursive \
&& echo "finish git clone flashinfer..." \
Expand Down
6 changes: 3 additions & 3 deletions docker/versions.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
"UBUNTU_VERSION": {
"default": "22.04"
},
"FLASHINFER_VERSION": {
"default": "0.6.10"
},
"BUILD_BASE_IMAGE": {
"default": "nvidia/cuda:13.0.2-devel-ubuntu22.04"
},
Expand Down Expand Up @@ -67,9 +70,6 @@
"RUN_WHEEL_CHECK": {
"default": "true"
},
"FLASHINFER_VERSION": {
"default": "0.6.8.post1"
},
"GDRCOPY_CUDA_VERSION": {
"default": "12.8"
},
Expand Down
4 changes: 2 additions & 2 deletions requirements/cuda.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ torchaudio==2.11.0
# These must be updated alongside torch
torchvision==0.26.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# FlashInfer should be updated together with the Dockerfile
flashinfer-python==0.6.8.post1
flashinfer-cubin==0.6.8.post1
flashinfer-python==0.6.10
flashinfer-cubin==0.6.10
apache-tvm-ffi==0.1.9
tilelang==0.1.9
# Cap nvidia-cudnn-frontend (transitive dep of flashinfer) due to
Expand Down
8 changes: 8 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,14 @@ def _read_requirements(filename: str) -> list[str]:
# vllm-flash-attn is built only for CUDA 12.x.
# Skip for other versions.
continue
if req.startswith("flashinfer-python") and cuda_major == "13":
# Activate FI's `[cu13]` extra on cu13 builds (cu13 deps for
# the SM100 GDN kernel). Mirrors the Dockerfile cu13 path.
req = req.replace(
"flashinfer-python",
"flashinfer-python[cu13]",
1,
)
modified_requirements.append(req)
requirements = modified_requirements
elif _is_hip():
Expand Down
Loading