Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions .buildkite/test_areas/disaggregated.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# We assume uv pip install -r requirements/kv_connectors.txt is run in the image-build step.
group: Disaggregated
depends_on:
- image-build
Expand All @@ -11,7 +12,6 @@ steps:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl/
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
- label: Distributed FlashInfer NixlConnector PD accuracy (4 GPUs)
key: distributed-flashinfer-nixlconnector-pd-accuracy-4-gpus
Expand All @@ -22,7 +22,6 @@ steps:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl/
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- FLASHINFER=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh

- label: DP EP Distributed NixlConnector PD accuracy tests (4 GPUs)
Expand All @@ -34,7 +33,6 @@ steps:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl/
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- DP_EP=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh

- label: CrossLayer KV layout Distributed NixlConnector PD accuracy tests (4 GPUs)
Expand All @@ -46,7 +44,6 @@ steps:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl/
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- CROSS_LAYERS_BLOCKS=True bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh

- label: Hybrid SSM NixlConnector PD accuracy tests (4 GPUs)
Expand All @@ -58,7 +55,6 @@ steps:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl/
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- HYBRID_SSM=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh

- label: MultiConnector (Nixl+Offloading) PD accuracy (2 GPUs)
Expand All @@ -73,7 +69,6 @@ steps:
- vllm/distributed/kv_transfer/kv_connector/v1/offloading/
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- bash v1/kv_connector/nixl_integration/run_multi_connector_accuracy_test.sh

- label: NixlConnector PD + Spec Decode acceptance (2 GPUs)
Expand All @@ -87,7 +82,6 @@ steps:
- vllm/v1/worker/kv_connector_model_runner_mixin.py
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- bash v1/kv_connector/nixl_integration/config_sweep_spec_decode_test.sh

- label: MultiConnector (Nixl+Offloading) PD edge cases (2 GPUs)
Expand All @@ -102,5 +96,4 @@ steps:
- vllm/distributed/kv_transfer/kv_connector/v1/offloading/
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- bash v1/kv_connector/nixl_integration/run_multi_connector_edge_case_test.sh
36 changes: 34 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -887,6 +887,31 @@ RUN apt-get update -y \
# We can specify the standard or nightly build of PyTorch
ARG PYTORCH_NIGHTLY

# install kv_connectors if requested (same logic as vllm-openai-base)
ARG INSTALL_KV_CONNECTORS=false
ARG CUDA_VERSION
ARG torch_cuda_arch_list='7.5 8.0 8.6 8.9 9.0 10.0 11.0 12.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
RUN --mount=type=cache,target=/opt/uv/cache \
--mount=type=bind,source=requirements/kv_connectors.txt,target=/tmp/kv_connectors.txt,ro \
CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-'); \
CUDA_HOME=/usr/local/cuda; \
BUILD_PKGS="libcusparse-dev-${CUDA_VERSION_DASH} \
libcublas-dev-${CUDA_VERSION_DASH} \
libcusolver-dev-${CUDA_VERSION_DASH}"; \
if [ "$INSTALL_KV_CONNECTORS" = "true" ]; then \
uv pip install --system -r /tmp/kv_connectors.txt --no-build || ( \
apt-get update -y && \
apt-get install -y --no-install-recommends --allow-change-held-packages ${BUILD_PKGS} && \
uv pip install --system -r /tmp/kv_connectors.txt --no-build-isolation && \
apt-get purge -y ${BUILD_PKGS} && \
rm -rf /var/lib/apt/lists/* \
); \
uv pip uninstall --system nixl-cu12 nixl-cu13 2>/dev/null || true; \
uv pip install --system --no-deps nixl-cu${CUDA_MAJOR}; \
fi

# Install development dependencies (for testing)
COPY requirements/lint.txt requirements/lint.txt
COPY requirements/test/cuda.in requirements/test/cuda.in
Expand Down Expand Up @@ -922,6 +947,10 @@ RUN --mount=type=cache,target=/opt/uv/cache \
RUN --mount=type=cache,target=/opt/uv/cache \
uv pip install --system -e tests/vllm_test_utils

# Prevent early CUDA initialization when CUDA-dependent packages (cupy, nixl)
# are installed — tests that fork subprocesses would otherwise fail.
ENV CUDA_MODULE_LOADING=LAZY

# enable fast downloads from hf (for testing)
ENV HF_XET_HIGH_PERFORMANCE 1

Expand All @@ -936,6 +965,7 @@ COPY vllm/v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1
# will not be imported by other tests
RUN mkdir src
RUN mv vllm src/vllm

#################### TEST IMAGE ####################

#################### OPENAI API SERVER ####################
Expand Down Expand Up @@ -978,8 +1008,10 @@ RUN --mount=type=cache,target=/opt/uv/cache \
# clean up -dev packages, keep runtime libraries
rm -rf /var/lib/apt/lists/* \
); \
# Force-reinstall the matching CUDA wheel so the correct nixl_ep_cpp.so is installed.
uv pip install --system --force-reinstall --no-deps nixl-cu${CUDA_MAJOR}; \
# Remove all nixl-cu* variants then install only the one matching this
# image's CUDA (nixl>=1.1.0 installs both)
uv pip uninstall --system nixl-cu12 nixl-cu13 2>/dev/null || true; \
uv pip install --system --no-deps nixl-cu${CUDA_MAJOR}; \
fi

# Optional override: install mooncake-transfer-engine from a URL instead of the
Expand Down
15 changes: 8 additions & 7 deletions docker/docker-bake.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,14 @@ target "_common" {
dockerfile = "docker/Dockerfile"
context = "."
args = {
max_jobs = MAX_JOBS
nvcc_threads = NVCC_THREADS
torch_cuda_arch_list = TORCH_CUDA_ARCH_LIST
VLLM_BUILD_COMMIT = VLLM_BUILD_COMMIT != "unknown" ? VLLM_BUILD_COMMIT : (COMMIT != "" ? COMMIT : "unknown")
VLLM_BUILD_PIPELINE = VLLM_BUILD_PIPELINE
VLLM_BUILD_URL = VLLM_BUILD_URL
VLLM_IMAGE_TAG = VLLM_IMAGE_TAG
max_jobs = MAX_JOBS
nvcc_threads = NVCC_THREADS
torch_cuda_arch_list = TORCH_CUDA_ARCH_LIST
INSTALL_KV_CONNECTORS = "true"
VLLM_BUILD_COMMIT = VLLM_BUILD_COMMIT != "unknown" ? VLLM_BUILD_COMMIT : (COMMIT != "" ? COMMIT : "unknown")
VLLM_BUILD_PIPELINE = VLLM_BUILD_PIPELINE
VLLM_BUILD_URL = VLLM_BUILD_URL
VLLM_IMAGE_TAG = VLLM_IMAGE_TAG
}
}

Expand Down
Loading