Skip to content

Commit 26aac03

Browse files
dmitry-tokarev-nvJason Zhou
authored andcommitted
chore: vllm 0.10.1.1 (#2641)
Signed-off-by: Jason Zhou <[email protected]>
1 parent 878afde commit 26aac03

File tree

3 files changed

+12
-12
lines changed

3 files changed

+12
-12
lines changed

container/Dockerfile.vllm

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
1313
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
1414

1515
# Make sure to update the dependency version in pyproject.toml when updating this
16-
ARG VLLM_REF="aab549870df50edf0512f0a59b574f692f546465" # from v0.10.1
16+
ARG VLLM_REF="1da94e673c257373280026f75ceb4effac80e892" # from v0.10.1.1
1717
ARG TORCH_BACKEND="cu128"
1818

19-
# Match 0.10.1 vLLM release
20-
# https://github.com/vllm-project/vllm/releases/tag/v0.10.1
19+
# Match 0.10.1.1 vLLM release
20+
# https://github.com/vllm-project/vllm/releases/tag/v0.10.1.1
2121
# Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
2222
# "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'"
2323
ARG DEEPGEMM_REF="f85ec64"

container/deps/vllm/install_vllm.sh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ set -euo pipefail
2020

2121
# Parse arguments
2222
EDITABLE=true
23-
VLLM_REF="aab549870df50edf0512f0a59b574f692f546465" # from v0.10.1
23+
VLLM_REF="1da94e673c257373280026f75ceb4effac80e892" # from v0.10.1.1
2424
# When updating above VLLM_REF make sure precompiled wheel file URL is correct. Run this command:
2525
# aws s3 ls s3://vllm-wheels/${VLLM_REF}/ --region us-west-2 --no-sign-request
26-
VLLM_PRECOMPILED_WHEEL_LOCATION="https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_REF}/vllm-0.10.1-cp38-abi3-manylinux1_x86_64.whl"
26+
VLLM_PRECOMPILED_WHEEL_LOCATION="https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_REF}/vllm-0.10.1.1-cp38-abi3-manylinux1_x86_64.whl"
2727
VLLM_GIT_URL="https://github.com/vllm-project/vllm.git"
2828
MAX_JOBS=16
2929
INSTALLATION_DIR=/tmp
@@ -86,13 +86,13 @@ while [[ $# -gt 0 ]]; do
8686
echo "Options:"
8787
echo " --editable Install vllm in editable mode (default)"
8888
echo " --no-editable Install vllm in non-editable mode"
89-
echo f" --vllm-ref REF Git reference to checkout (default: ${VLLM_REF})"
90-
echo f" --max-jobs NUM Maximum number of parallel jobs (default: ${MAX_JOBS})"
89+
echo " --vllm-ref REF Git reference to checkout (default: ${VLLM_REF})"
90+
echo " --max-jobs NUM Maximum number of parallel jobs (default: ${MAX_JOBS})"
9191
echo " --arch ARCH Architecture (amd64|arm64, default: auto-detect)"
92-
echo f" --installation-dir DIR Directory to install vllm (default: ${INSTALLATION_DIR})"
93-
echo f" --deepgemm-ref REF Git reference for DeepGEMM (default: ${DEEPGEMM_REF})"
94-
echo f" --flashinf-ref REF Git reference for Flash Infer (default: ${FLASHINF_REF})"
95-
echo f" --torch-backend BACKEND Torch backend to use (default: ${TORCH_BACKEND})"
92+
echo " --installation-dir DIR Directory to install vllm (default: ${INSTALLATION_DIR})"
93+
echo " --deepgemm-ref REF Git reference for DeepGEMM (default: ${DEEPGEMM_REF})"
94+
echo " --flashinf-ref REF Git reference for Flash Infer (default: ${FLASHINF_REF})"
95+
echo " --torch-backend BACKEND Torch backend to use (default: ${TORCH_BACKEND})"
9696
exit 0
9797
;;
9898
*)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ trtllm =[
5555
vllm = [
5656
"uvloop",
5757
"nixl<=0.4.1",
58-
"vllm[flashinfer]==0.10.1",
58+
"vllm[flashinfer]==0.10.1.1",
5959
]
6060

6161
sglang = [

0 commit comments

Comments
 (0)