@@ -43,19 +43,30 @@ def _compute_port_offset(self) -> int:
4343 Uses data_parallel_rank if DP case, otherwise falls back to
4444 the replica rank assigned by Ray Serve (TP/PP case).
4545
46+ For TP/PP cases, multiply by num_devices (tp × pp) to reserve
47+ sufficient port space, since each worker needs a unique port.
48+ Each TP worker adds its tp_rank (0, 1, ..., tp_size-1) to the
49+ base port at bind time, and PP stages also need separate ports.
50+
4651 Returns:
4752 Non-negative integer offset to add to a base port.
4853 """
4954 # Prefer explicit DP rank when available
5055 dp_rank = self .llm_config .engine_kwargs .get ("data_parallel_rank" )
5156 if isinstance (dp_rank , int ) and dp_rank >= 0 :
57+ # vLLM already accounts for TP spacing in DP offset calculation
58+ # (data_parallel_rank × tp_size), don't multiply here
5259 return dp_rank
5360
5461 # Fall back to Serve replica rank for TP/PP cases
5562 try :
5663 rc = serve .get_replica_context ()
5764 if rc and hasattr (rc , "rank" ):
58- return rc .rank
65+ # Use num_devices (tp × pp) to reserve ports for all workers
66+ # Each replica spawns num_devices workers, each needing a unique port
67+ engine_config = self .llm_config .get_engine_config ()
68+ num_devices = engine_config .num_devices
69+ return rc .rank * num_devices
5970 except Exception :
6071 # Best-effort fallback; avoid introducing failures in setup paths
6172 pass
0 commit comments