Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,9 @@ COPY requirements/cuda.txt requirements/cuda.txt
COPY use_existing_torch.py use_existing_torch.py
COPY pyproject.toml pyproject.toml
RUN --mount=type=cache,target=/root/.cache/uv \
if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "13" ]; then \
sed -i 's/^flashinfer-python==/flashinfer-python[cu13]==/' requirements/cuda.txt; \
fi; \
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
echo "Installing torch nightly..." \
&& uv pip install --python /opt/venv/bin/python3 torch torchaudio torchvision --pre \
Expand Down Expand Up @@ -616,14 +619,17 @@ ARG PYTORCH_CUDA_INDEX_BASE_URL
COPY requirements/common.txt /tmp/common.txt
COPY requirements/cuda.txt /tmp/requirements-cuda.txt
RUN --mount=type=cache,target=/root/.cache/uv \
if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "13" ]; then \
sed -i 's/^flashinfer-python==/flashinfer-python[cu13]==/' /tmp/requirements-cuda.txt; \
fi; \
uv pip install --system -r /tmp/requirements-cuda.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && \
rm /tmp/requirements-cuda.txt /tmp/common.txt

# Install FlashInfer JIT cache (requires CUDA-version-specific index URL)
# https://docs.flashinfer.ai/installation.html
# From versions.json: .flashinfer.version
ARG FLASHINFER_VERSION=0.6.8.post1
ARG FLASHINFER_VERSION=0.6.11
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system flashinfer-jit-cache==${FLASHINFER_VERSION} \
--extra-index-url https://flashinfer.ai/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile.nightly_torch
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,13 @@ RUN pip install setuptools==75.6.0 packaging==23.2 ninja==1.11.1.3 build==1.2.2.


# build flashinfer for torch nightly from source around 10 mins
# release version: v0.6.8.post1
# release version: v0.6.11
# todo(elainewy): cache flashinfer build result for faster build
ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/root/.cache/uv \
echo "git clone flashinfer..." \
&& git clone --depth 1 --branch v0.6.8.post1 --recursive https://github.com/flashinfer-ai/flashinfer.git \
&& git clone --depth 1 --branch v0.6.11 --recursive https://github.com/flashinfer-ai/flashinfer.git \
&& cd flashinfer \
&& git submodule update --init --recursive \
&& echo "finish git clone flashinfer..." \
Expand Down
2 changes: 1 addition & 1 deletion docker/versions.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
"default": "true"
},
"FLASHINFER_VERSION": {
"default": "0.6.8.post1"
"default": "0.6.11"
},
"GDRCOPY_CUDA_VERSION": {
"default": "12.8"
Expand Down
4 changes: 2 additions & 2 deletions requirements/cuda.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ torchaudio==2.11.0
# These must be updated alongside torch
torchvision==0.26.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# FlashInfer should be updated together with the Dockerfile
flashinfer-python==0.6.8.post1
flashinfer-cubin==0.6.8.post1
flashinfer-python==0.6.11
flashinfer-cubin==0.6.11
apache-tvm-ffi==0.1.9
tilelang==0.1.9
# Cap nvidia-cudnn-frontend (transitive dep of flashinfer) due to
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -965,6 +965,8 @@ def _read_requirements(filename: str) -> list[str]:
cuda_major, cuda_minor = torch.version.cuda.split(".")
modified_requirements = []
for req in requirements:
if cuda_major == "13" and req.startswith("flashinfer-python=="):
req = req.replace("flashinfer-python==", "flashinfer-python[cu13]==", 1)
if "vllm-flash-attn" in req and cuda_major != "12":
# vllm-flash-attn is built only for CUDA 12.x.
# Skip for other versions.
Expand Down
Loading