Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions container/Dockerfile.vllm
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"

# Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_REF="aab549870df50edf0512f0a59b574f692f546465" # from v0.10.1
ARG VLLM_REF="1da94e673c257373280026f75ceb4effac80e892" # from v0.10.1.1
ARG TORCH_BACKEND="cu128"

# Match 0.10.1 vLLM release
# https://github.com/vllm-project/vllm/releases/tag/v0.10.1
# Match 0.10.1.1 vLLM release
# https://github.com/vllm-project/vllm/releases/tag/v0.10.1.1
# Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
# "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'"
ARG DEEPGEMM_REF="f85ec64"
Expand Down
4 changes: 2 additions & 2 deletions container/deps/vllm/install_vllm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ set -euo pipefail

# Parse arguments
EDITABLE=true
VLLM_REF="aab549870df50edf0512f0a59b574f692f546465" # from v0.10.1
VLLM_REF="1da94e673c257373280026f75ceb4effac80e892" # from v0.10.1.1
# When updating above VLLM_REF make sure precompiled wheel file URL is correct. Run this command:
# aws s3 ls s3://vllm-wheels/${VLLM_REF}/ --region us-west-2 --no-sign-request
VLLM_PRECOMPILED_WHEEL_LOCATION="https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_REF}/vllm-0.10.1-cp38-abi3-manylinux1_x86_64.whl"
VLLM_PRECOMPILED_WHEEL_LOCATION="https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_REF}/vllm-0.10.1.1-cp38-abi3-manylinux1_x86_64.whl"
VLLM_GIT_URL="https://github.com/vllm-project/vllm.git"
MAX_JOBS=16
INSTALLATION_DIR=/tmp
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ trtllm =[
vllm = [
"uvloop",
"nixl<=0.4.1",
"vllm[flashinfer]==0.10.1",
"vllm[flashinfer]==0.10.1.1",
]

sglang = [
Expand Down
Loading