diff --git a/.buildkite/scripts/install-kv-connectors.sh b/.buildkite/scripts/install-kv-connectors.sh new file mode 100755 index 000000000000..34c502e6b9a8 --- /dev/null +++ b/.buildkite/scripts/install-kv-connectors.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +set -euo pipefail + +REQUIREMENTS_FILE="${KV_CONNECTORS_REQUIREMENTS:-/vllm-workspace/requirements/kv_connectors.txt}" + +uv pip install --system -r "${REQUIREMENTS_FILE}" + +NIXL_METADATA=$(python3 - <<'PY' +import importlib.metadata as metadata + +import torch + +cuda_version = torch.version.cuda +if cuda_version is None: + raise SystemExit("torch.version.cuda is not set") + +print(cuda_version.split(".", 1)[0], metadata.version("nixl")) +PY +) +read -r CUDA_MAJOR NIXL_VERSION <<<"${NIXL_METADATA}" + +# nixl>=1.1.0 can install multiple CUDA wheel variants. Keep only the variant +# matching this CI image so nixl_ep_cpp links against the available libcudart. +uv pip uninstall --system nixl-cu12 nixl-cu13 2>/dev/null || true +uv pip install --system --no-deps "nixl-cu${CUDA_MAJOR}==${NIXL_VERSION}" + +python3 - <<'PY' +import importlib.metadata as metadata + +for package_name in ("nixl", "nixl-cu12", "nixl-cu13"): + try: + version = metadata.version(package_name) + except metadata.PackageNotFoundError: + version = "not installed" + print(f"{package_name}: {version}") +PY diff --git a/.buildkite/test_areas/disaggregated.yaml b/.buildkite/test_areas/disaggregated.yaml index d3e02be23981..c9d5237b67b5 100644 --- a/.buildkite/test_areas/disaggregated.yaml +++ b/.buildkite/test_areas/disaggregated.yaml @@ -11,7 +11,7 @@ steps: - vllm/distributed/kv_transfer/kv_connector/v1/nixl/ - tests/v1/kv_connector/nixl_integration/ commands: - - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt + - bash /vllm-workspace/.buildkite/scripts/install-kv-connectors.sh - bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh - label: Distributed FlashInfer NixlConnector PD accuracy (4 GPUs) key: distributed-flashinfer-nixlconnector-pd-accuracy-4-gpus @@ -22,7 +22,7 @@ steps: - vllm/distributed/kv_transfer/kv_connector/v1/nixl/ - tests/v1/kv_connector/nixl_integration/ commands: - - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt + - bash /vllm-workspace/.buildkite/scripts/install-kv-connectors.sh - FLASHINFER=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh - label: DP EP Distributed NixlConnector PD accuracy tests (4 GPUs) @@ -34,7 +34,7 @@ steps: - vllm/distributed/kv_transfer/kv_connector/v1/nixl/ - tests/v1/kv_connector/nixl_integration/ commands: - - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt + - bash /vllm-workspace/.buildkite/scripts/install-kv-connectors.sh - DP_EP=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh - label: CrossLayer KV layout Distributed NixlConnector PD accuracy tests (4 GPUs) @@ -46,7 +46,7 @@ steps: - vllm/distributed/kv_transfer/kv_connector/v1/nixl/ - tests/v1/kv_connector/nixl_integration/ commands: - - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt + - bash /vllm-workspace/.buildkite/scripts/install-kv-connectors.sh - CROSS_LAYERS_BLOCKS=True bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh - label: Hybrid SSM NixlConnector PD accuracy tests (4 GPUs) @@ -58,7 +58,7 @@ steps: - vllm/distributed/kv_transfer/kv_connector/v1/nixl/ - tests/v1/kv_connector/nixl_integration/ commands: - - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt + - bash /vllm-workspace/.buildkite/scripts/install-kv-connectors.sh - HYBRID_SSM=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh - label: MultiConnector (Nixl+Offloading) PD accuracy (2 GPUs) @@ -73,7 +73,7 @@ steps: - vllm/distributed/kv_transfer/kv_connector/v1/offloading/ - tests/v1/kv_connector/nixl_integration/ commands: - - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt + - bash /vllm-workspace/.buildkite/scripts/install-kv-connectors.sh - bash v1/kv_connector/nixl_integration/run_multi_connector_accuracy_test.sh - label: NixlConnector PD + Spec Decode acceptance (2 GPUs) @@ -87,7 +87,7 @@ steps: - vllm/v1/worker/kv_connector_model_runner_mixin.py - tests/v1/kv_connector/nixl_integration/ commands: - - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt + - bash /vllm-workspace/.buildkite/scripts/install-kv-connectors.sh - bash v1/kv_connector/nixl_integration/config_sweep_spec_decode_test.sh - label: MultiConnector (Nixl+Offloading) PD edge cases (2 GPUs) @@ -102,5 +102,5 @@ steps: - vllm/distributed/kv_transfer/kv_connector/v1/offloading/ - tests/v1/kv_connector/nixl_integration/ commands: - - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt - - bash v1/kv_connector/nixl_integration/run_multi_connector_edge_case_test.sh \ No newline at end of file + - bash /vllm-workspace/.buildkite/scripts/install-kv-connectors.sh + - bash v1/kv_connector/nixl_integration/run_multi_connector_edge_case_test.sh diff --git a/.buildkite/test_areas/misc.yaml b/.buildkite/test_areas/misc.yaml index ddeb692d831c..ce313060d8b7 100644 --- a/.buildkite/test_areas/misc.yaml +++ b/.buildkite/test_areas/misc.yaml @@ -86,7 +86,7 @@ steps: - tests/v1/metrics - tests/entrypoints/openai/correctness/test_lmeval.py commands: - - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt + - bash /vllm-workspace/.buildkite/scripts/install-kv-connectors.sh - export VLLM_WORKER_MULTIPROC_METHOD=spawn # split the test to avoid interference - pytest -v -s -m 'not cpu_test' v1/core