vllm-project · vllm-bot · Mar 6, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 4, 2026
diff --git a/.buildkite/test_areas/distributed.yaml b/.buildkite/test_areas/distributed.yaml
@@ -67,6 +67,7 @@ steps:
   - tests/v1/distributed
   - tests/v1/engine/test_engine_core_client.py
   - tests/distributed/test_symm_mem_allreduce.py
+  - tests/distributed/test_multiproc_executor.py
   commands:
   # https://github.com/NVIDIA/nccl/issues/1838
   - export NCCL_CUMEM_HOST_ENABLE=0
@@ -95,6 +96,8 @@ steps:
   - pytest -v -s distributed/test_pynccl.py
   - pytest -v -s distributed/test_events.py
   - pytest -v -s distributed/test_symm_mem_allreduce.py
+  # test multi-node TP with multiproc executor (simulated on single node)
+  - pytest -v -s distributed/test_multiproc_executor.py::test_multiproc_executor_multi_node
   # TODO: create a dedicated test section for multi-GPU example tests
   # when we have multiple distributed example tests
   # OLD rlhf examples

diff --git a/tests/distributed/test_multiproc_executor.py b/tests/distributed/test_multiproc_executor.py
@@ -9,11 +9,11 @@
 
 import multiprocessing
 import os
+import socket
 
 from tests.utils import multi_gpu_test
 from vllm.config import VllmConfig
 from vllm.engine.arg_utils import EngineArgs
-from vllm.utils import get_open_port
 from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.executor.multiproc_executor import MultiprocExecutor
 
@@ -333,7 +333,9 @@ def test_multiproc_executor_multi_node():
     - Node 1 (rank 1): Uses GPUs 2,3 (CUDA_VISIBLE_DEVICES=2,3) with TP=2
     Total world_size = 4, nnodes = 2
     """
-    port = get_open_port()
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("", 0))
+        port = s.getsockname()[1]
     # symm_mem does not work for simulating multi instance in single node
     os.environ["VLLM_ALLREDUCE_USE_SYMM_MEM"] = "0"
 

@@ -608,7 +608,6 @@ def __init__(
         )
 
         # Load model
-        self._init_message_queues(input_shm_handle, vllm_config)
         is_eep_new_worker = envs.VLLM_ELASTIC_EP_SCALE_UP_LAUNCH
         if not is_eep_new_worker:
             self.worker.init_device()
@@ -618,6 +617,10 @@ def __init__(
             )
             self.worker.load_model()
 
+        # Initialize message queues after init_device() since multi-node setups
+        # (nnodes_within_dp > 1) require distributed groups to be initialized
+        self._init_message_queues(input_shm_handle, vllm_config)
+
         # Enable environment variable cache (e.g. assume no more
         # environment variable overrides after this point)
         enable_envs_cache()