diff --git a/.buildkite/test_areas/distributed.yaml b/.buildkite/test_areas/distributed.yaml index 64911983f5a8..a23f2a0efd67 100644 --- a/.buildkite/test_areas/distributed.yaml +++ b/.buildkite/test_areas/distributed.yaml @@ -67,6 +67,7 @@ steps: - tests/v1/distributed - tests/v1/engine/test_engine_core_client.py - tests/distributed/test_symm_mem_allreduce.py + - tests/distributed/test_multiproc_executor.py commands: # https://github.com/NVIDIA/nccl/issues/1838 - export NCCL_CUMEM_HOST_ENABLE=0 @@ -95,6 +96,8 @@ steps: - pytest -v -s distributed/test_pynccl.py - pytest -v -s distributed/test_events.py - pytest -v -s distributed/test_symm_mem_allreduce.py + # test multi-node TP with multiproc executor (simulated on single node) + - pytest -v -s distributed/test_multiproc_executor.py::test_multiproc_executor_multi_node # TODO: create a dedicated test section for multi-GPU example tests # when we have multiple distributed example tests # OLD rlhf examples diff --git a/tests/distributed/test_multiproc_executor.py b/tests/distributed/test_multiproc_executor.py index e741a79bc4ed..29d7f94c5102 100644 --- a/tests/distributed/test_multiproc_executor.py +++ b/tests/distributed/test_multiproc_executor.py @@ -9,11 +9,11 @@ import multiprocessing import os +import socket from tests.utils import multi_gpu_test from vllm.config import VllmConfig from vllm.engine.arg_utils import EngineArgs -from vllm.utils import get_open_port from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.executor.multiproc_executor import MultiprocExecutor @@ -333,7 +333,9 @@ def test_multiproc_executor_multi_node(): - Node 1 (rank 1): Uses GPUs 2,3 (CUDA_VISIBLE_DEVICES=2,3) with TP=2 Total world_size = 4, nnodes = 2 """ - port = get_open_port() + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(("", 0)) + port = s.getsockname()[1] # symm_mem does not work for simulating multi instance in single node os.environ["VLLM_ALLREDUCE_USE_SYMM_MEM"] = "0" diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py index ec215d8e525b..c93719eba9e6 100644 --- a/vllm/v1/executor/multiproc_executor.py +++ b/vllm/v1/executor/multiproc_executor.py @@ -608,7 +608,6 @@ def __init__( ) # Load model - self._init_message_queues(input_shm_handle, vllm_config) is_eep_new_worker = envs.VLLM_ELASTIC_EP_SCALE_UP_LAUNCH if not is_eep_new_worker: self.worker.init_device() @@ -618,6 +617,10 @@ def __init__( ) self.worker.load_model() + # Initialize message queues after init_device() since multi-node setups + # (nnodes_within_dp > 1) require distributed groups to be initialized + self._init_message_queues(input_shm_handle, vllm_config) + # Enable environment variable cache (e.g. assume no more # environment variable overrides after this point) enable_envs_cache()