Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions container/Dockerfile.vllm
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"

# Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_REF="aab549870df50edf0512f0a59b574f692f546465" # from v0.10.1
ARG VLLM_REF="1da94e673c257373280026f75ceb4effac80e892" # from v0.10.1.1
ARG TORCH_BACKEND="cu128"

# Match 0.10.1 vLLM release
# https://github.com/vllm-project/vllm/releases/tag/v0.10.1
# Match 0.10.1.1 vLLM release
# https://github.com/vllm-project/vllm/releases/tag/v0.10.1.1
# Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
# "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'"
ARG DEEPGEMM_REF="f85ec64"
Expand Down
16 changes: 8 additions & 8 deletions container/deps/vllm/install_vllm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ set -euo pipefail

# Parse arguments
EDITABLE=true
VLLM_REF="aab549870df50edf0512f0a59b574f692f546465" # from v0.10.1
VLLM_REF="1da94e673c257373280026f75ceb4effac80e892" # from v0.10.1.1
# When updating above VLLM_REF make sure precompiled wheel file URL is correct. Run this command:
# aws s3 ls s3://vllm-wheels/${VLLM_REF}/ --region us-west-2 --no-sign-request
VLLM_PRECOMPILED_WHEEL_LOCATION="https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_REF}/vllm-0.10.1-cp38-abi3-manylinux1_x86_64.whl"
VLLM_PRECOMPILED_WHEEL_LOCATION="https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_REF}/vllm-0.10.1.1-cp38-abi3-manylinux1_x86_64.whl"
VLLM_GIT_URL="https://github.com/vllm-project/vllm.git"
MAX_JOBS=16
INSTALLATION_DIR=/tmp
Expand Down Expand Up @@ -86,13 +86,13 @@ while [[ $# -gt 0 ]]; do
echo "Options:"
echo " --editable Install vllm in editable mode (default)"
echo " --no-editable Install vllm in non-editable mode"
echo f" --vllm-ref REF Git reference to checkout (default: ${VLLM_REF})"
echo f" --max-jobs NUM Maximum number of parallel jobs (default: ${MAX_JOBS})"
echo " --vllm-ref REF Git reference to checkout (default: ${VLLM_REF})"
echo " --max-jobs NUM Maximum number of parallel jobs (default: ${MAX_JOBS})"
echo " --arch ARCH Architecture (amd64|arm64, default: auto-detect)"
echo f" --installation-dir DIR Directory to install vllm (default: ${INSTALLATION_DIR})"
echo f" --deepgemm-ref REF Git reference for DeepGEMM (default: ${DEEPGEMM_REF})"
echo f" --flashinf-ref REF Git reference for Flash Infer (default: ${FLASHINF_REF})"
echo f" --torch-backend BACKEND Torch backend to use (default: ${TORCH_BACKEND})"
echo " --installation-dir DIR Directory to install vllm (default: ${INSTALLATION_DIR})"
echo " --deepgemm-ref REF Git reference for DeepGEMM (default: ${DEEPGEMM_REF})"
echo " --flashinf-ref REF Git reference for Flash Infer (default: ${FLASHINF_REF})"
echo " --torch-backend BACKEND Torch backend to use (default: ${TORCH_BACKEND})"
exit 0
;;
*)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ trtllm =[
vllm = [
"uvloop",
"nixl<=0.4.1",
"vllm[flashinfer]==0.10.1",
"vllm[flashinfer]==0.10.1.1",
]

sglang = [
Expand Down
Loading