ray-project · nrghosh · Jan 17, 2026 · Jan 18, 2026 · Jan 19, 2026 · Jan 21, 2026
diff --git a/ci/compile_llm_requirements.sh b/ci/compile_llm_requirements.sh
@@ -6,9 +6,9 @@ CONFIG_PATH="${1:-ci/raydepsets/configs/rayllm.depsets.yaml}"
 
 mkdir -p /tmp/ray-deps
 
-# Remove the GPU constraints
+# Remove the GPU constraints, numpy, scipy, and pandas pin (vLLM 0.15.0+ requires numpy>=2, compatible scipy, and pandas>=2.0)
 cp python/requirements_compiled.txt /tmp/ray-deps/requirements_compiled.txt
-sed -e '/^--extra-index-url /d' -e '/^--find-links /d' /tmp/ray-deps/requirements_compiled.txt > /tmp/ray-deps/requirements_compiled.txt.tmp
+sed -e '/^--extra-index-url /d' -e '/^--find-links /d' -e '/^numpy==/d' -e '/^scipy==/d' -e '/^pandas==/d' /tmp/ray-deps/requirements_compiled.txt > /tmp/ray-deps/requirements_compiled.txt.tmp
 mv /tmp/ray-deps/requirements_compiled.txt.tmp /tmp/ray-deps/requirements_compiled.txt
 
 bazel run //ci/raydepsets:raydepsets -- build "${CONFIG_PATH}"

diff --git a/ci/raydepsets/configs/llm_release_tests.depsets.yaml b/ci/raydepsets/configs/llm_release_tests.depsets.yaml
@@ -8,8 +8,10 @@ build_arg_sets:
   append_flags:
     - --python-version=3.11
     - --unsafe-package ray
-    - --python-platform=linux
+    # Use manylinux_2_31 for vllm 0.15.0 wheel compatibility
+    - --python-platform=x86_64-manylinux_2_31
     - --extra-index-url https://download.pytorch.org/whl/${CUDA_CODE}
+    - --override python/requirements/llm/llm-override.txt
   build_arg_sets:
     - cu128
 

diff --git a/ci/raydepsets/configs/rayllm.depsets.yaml b/ci/raydepsets/configs/rayllm.depsets.yaml
@@ -11,8 +11,10 @@ build_arg_sets:
   append_flags:
     - --python-version=3.11
     - --unsafe-package ray
-    - --python-platform=linux
+     # Use manylinux_2_31 for vllm 0.15.0 wheel compatibility
+    - --python-platform=x86_64-manylinux_2_31
     - --extra-index-url https://download.pytorch.org/whl/${CUDA_CODE}
+    - --override python/requirements/llm/llm-override.txt
   build_arg_sets:
     - cpu
     - cu128

diff --git a/ci/raydepsets/pre_hooks/remove-compiled-headers.sh b/ci/raydepsets/pre_hooks/remove-compiled-headers.sh
@@ -12,7 +12,7 @@ fi
 
 mkdir -p /tmp/ray-deps
 
-# Remove the GPU constraints
+# Remove the GPU constraints, numpy, scipy, and pandas pin (vLLM 0.15.0+ requires numpy>=2, compatible scipy, and pandas>=2.0)
 cp "python/${FILENAME}" "/tmp/ray-deps/${FILENAME}"
-sed -e '/^--extra-index-url /d' -e '/^--find-links /d' "/tmp/ray-deps/${FILENAME}" > "/tmp/ray-deps/${FILENAME}.tmp"
+sed -e '/^--extra-index-url /d' -e '/^--find-links /d' -e '/^numpy==/d' -e '/^scipy==/d' -e '/^pandas==/d' "/tmp/ray-deps/${FILENAME}" > "/tmp/ray-deps/${FILENAME}.tmp"
 numpy==1.26.4 
 numpy==1.26.4 
 mv "/tmp/ray-deps/${FILENAME}.tmp" "/tmp/ray-deps/${FILENAME}"
@@ -42,7 +42,7 @@ def _testing_build_dp_openai_app(builder_config, **kwargs):
 # Configure the model with data parallel settings
 config = LLMConfig(
     model_loading_config={
-        "model_id": "Qwen/Qwen2.5-0.5B-Instruct"
+        "model_id": "microsoft/Phi-tiny-MoE-instruct"
     },
     engine_kwargs={
         "data_parallel_size": 2,  # Number of DP replicas

@@ -57,7 +57,7 @@ def _testing_build_dp_deployment(llm_config, **kwargs):
 # Configure prefill with data parallel attention
 prefill_config = LLMConfig(
     model_loading_config={
-        "model_id": "Qwen/Qwen2.5-0.5B-Instruct"
+        "model_id": "microsoft/Phi-tiny-MoE-instruct"
     },
     engine_kwargs={
         "data_parallel_size": 2,  # 2 DP replicas for prefill
@@ -78,7 +78,7 @@ def _testing_build_dp_deployment(llm_config, **kwargs):
 # Configure decode with data parallel attention
 decode_config = LLMConfig(
     model_loading_config={
-        "model_id": "Qwen/Qwen2.5-0.5B-Instruct"
+        "model_id": "microsoft/Phi-tiny-MoE-instruct"
     },
     engine_kwargs={
         "data_parallel_size": 2,  # 2 DP replicas for decode (adjusted for 4 GPU limit)

diff --git a/docker/ray-llm/Dockerfile b/docker/ray-llm/Dockerfile
@@ -7,7 +7,7 @@ COPY python/deplocks/llm/rayllm_*.lock ./
 
 # vLLM version tag to use for EP kernel and DeepGEMM install scripts
 # Keep in sync with vllm version in python/requirements/llm/llm-requirements.txt
-ARG VLLM_SCRIPTS_REF="v0.12.0"
+ARG VLLM_SCRIPTS_REF="v0.15.0"
 
 RUN <<EOF
 #!/bin/bash
@@ -44,7 +44,9 @@ export UV_SYSTEM_PYTHON=1
 export TORCH_CUDA_ARCH_LIST="9.0a 10.0a"
 
 # Install EP kernels (PPLX, DeepEP, and NVSHMEM)
-curl -fsSL "${VLLM_RAW}/tools/ep_kernels/install_python_libraries.sh" | bash -s -- --workspace /home/ray/llm_ep_support
+# Use nvshmem 3.3.20 which was compiled with CUDA 12.8 to be compatible with Ray LLM image built with CUDA 12.8
+curl -fsSL "${VLLM_RAW}/tools/ep_kernels/install_python_libraries.sh" | \
+    bash -s -- --workspace /home/ray/llm_ep_support --nvshmem-ver 3.3.20
 
 # Install DeepGEMM
 curl -fsSL "${VLLM_RAW}/tools/install_deepgemm.sh" | bash

diff --git a/python/deplocks/base_deps/ray_base_deps_py3.10.lock b/python/deplocks/base_deps/ray_base_deps_py3.10.lock
@@ -472,9 +472,8 @@ cython==0.29.37 \
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.10.lock
     #   -r docker/base-deps/requirements.in
-flatbuffers==23.5.26 \
-    --hash=sha256:9ea1144cac05ce5d86e2859f431c6cd5e66cd9c78c558317c7955fb8d4c78d89 \
-    --hash=sha256:c0ff356da363087b915fde4b8b45bdda73432fc17cddb3c8157472eab1422ad1
+flatbuffers==25.12.19 \
+    --hash=sha256:7634f50c427838bb021c2d66a3d1168e9d199b0607e6329399f04846d42e20b4
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.10.lock
     #   -r docker/base-deps/requirements.in
@@ -566,9 +565,9 @@ fsspec==2023.12.1 \
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.10.lock
     #   adlfs
-google-api-core==2.24.2 \
-    --hash=sha256:810a63ac95f3c441b7c0e43d344e372887f62ce9071ba972eacf32672e072de9 \
-    --hash=sha256:81718493daf06d96d6bc76a91c23874dbf2fac0adbbf542831b805ee6e974696
+google-api-core==2.29.0 \
+    --hash=sha256:84181be0f8e6b04006df75ddfe728f24489f0af57c96a529ff7cf45bc28797f7 \
+    --hash=sha256:d30bc60980daa36e314b5d5a3e5958b0200cb44ca8fa1be2b614e932b75a3ea9
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.10.lock
     #   google-api-python-client
@@ -692,9 +691,9 @@ google-resumable-media==2.6.0 \
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.10.lock
     #   google-cloud-storage
-googleapis-common-protos==1.61.0 \
-    --hash=sha256:22f1915393bb3245343f6efe87f6fe868532efc12aa26b391b15132e1279f1c0 \
-    --hash=sha256:8a64866a97f6304a7179873a465d6eee97b7a24ec6cfd78e0f575e96b821240b
+googleapis-common-protos==1.72.0 \
+    --hash=sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038 \
+    --hash=sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.10.lock
     #   google-api-core

diff --git a/python/deplocks/base_deps/ray_base_deps_py3.11.lock b/python/deplocks/base_deps/ray_base_deps_py3.11.lock
@@ -466,9 +466,8 @@ cython==0.29.37 \
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.11.lock
     #   -r docker/base-deps/requirements.in
-flatbuffers==23.5.26 \
-    --hash=sha256:9ea1144cac05ce5d86e2859f431c6cd5e66cd9c78c558317c7955fb8d4c78d89 \
-    --hash=sha256:c0ff356da363087b915fde4b8b45bdda73432fc17cddb3c8157472eab1422ad1
+flatbuffers==25.12.19 \
+    --hash=sha256:7634f50c427838bb021c2d66a3d1168e9d199b0607e6329399f04846d42e20b4
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.11.lock
     #   -r docker/base-deps/requirements.in
@@ -560,9 +559,9 @@ fsspec==2023.12.1 \
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.11.lock
     #   adlfs
-google-api-core==2.24.2 \
-    --hash=sha256:810a63ac95f3c441b7c0e43d344e372887f62ce9071ba972eacf32672e072de9 \
-    --hash=sha256:81718493daf06d96d6bc76a91c23874dbf2fac0adbbf542831b805ee6e974696
+google-api-core==2.29.0 \
+    --hash=sha256:84181be0f8e6b04006df75ddfe728f24489f0af57c96a529ff7cf45bc28797f7 \
+    --hash=sha256:d30bc60980daa36e314b5d5a3e5958b0200cb44ca8fa1be2b614e932b75a3ea9
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.11.lock
     #   google-api-python-client
@@ -686,9 +685,9 @@ google-resumable-media==2.6.0 \
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.11.lock
     #   google-cloud-storage
-googleapis-common-protos==1.61.0 \
-    --hash=sha256:22f1915393bb3245343f6efe87f6fe868532efc12aa26b391b15132e1279f1c0 \
-    --hash=sha256:8a64866a97f6304a7179873a465d6eee97b7a24ec6cfd78e0f575e96b821240b
+googleapis-common-protos==1.72.0 \
+    --hash=sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038 \
+    --hash=sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.11.lock
     #   google-api-core

diff --git a/python/deplocks/base_deps/ray_base_deps_py3.12.lock b/python/deplocks/base_deps/ray_base_deps_py3.12.lock
@@ -466,9 +466,8 @@ cython==0.29.37 \
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.12.lock
     #   -r docker/base-deps/requirements.in
-flatbuffers==23.5.26 \
-    --hash=sha256:9ea1144cac05ce5d86e2859f431c6cd5e66cd9c78c558317c7955fb8d4c78d89 \
-    --hash=sha256:c0ff356da363087b915fde4b8b45bdda73432fc17cddb3c8157472eab1422ad1
+flatbuffers==25.12.19 \
+    --hash=sha256:7634f50c427838bb021c2d66a3d1168e9d199b0607e6329399f04846d42e20b4
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.12.lock
     #   -r docker/base-deps/requirements.in
@@ -560,9 +559,9 @@ fsspec==2023.12.1 \
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.12.lock
     #   adlfs
-google-api-core==2.24.2 \
-    --hash=sha256:810a63ac95f3c441b7c0e43d344e372887f62ce9071ba972eacf32672e072de9 \
-    --hash=sha256:81718493daf06d96d6bc76a91c23874dbf2fac0adbbf542831b805ee6e974696
+google-api-core==2.29.0 \
+    --hash=sha256:84181be0f8e6b04006df75ddfe728f24489f0af57c96a529ff7cf45bc28797f7 \
+    --hash=sha256:d30bc60980daa36e314b5d5a3e5958b0200cb44ca8fa1be2b614e932b75a3ea9
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.12.lock
     #   google-api-python-client
@@ -686,9 +685,9 @@ google-resumable-media==2.6.0 \
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.12.lock
     #   google-cloud-storage
-googleapis-common-protos==1.61.0 \
-    --hash=sha256:22f1915393bb3245343f6efe87f6fe868532efc12aa26b391b15132e1279f1c0 \
-    --hash=sha256:8a64866a97f6304a7179873a465d6eee97b7a24ec6cfd78e0f575e96b821240b
+googleapis-common-protos==1.72.0 \
+    --hash=sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038 \
+    --hash=sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5
     # via
     #   -c python/deplocks/base_extra_testdeps/ray-base_extra_testdeps_py3.12.lock
     #   google-api-core