diff --git a/vllm/executor/ray_distributed_executor.py b/vllm/executor/ray_distributed_executor.py index f9e3f380a6a6..637d75b71291 100644 --- a/vllm/executor/ray_distributed_executor.py +++ b/vllm/executor/ray_distributed_executor.py @@ -141,6 +141,14 @@ def _init_workers_ray(self, placement_group: "PlacementGroup", **ray_remote_kwargs): num_gpus = envs.VLLM_RAY_PER_WORKER_GPUS + def retain_envs(var_name): + retain_var_list = [ + 'GLOO_SOCKET_IFNAME', 'HCCL_SOCKET_IFNAME', + 'NCCL_SOCKET_IFNAME' + ] + return ('HPU' in var_name or 'RAY' in var_name + or 'VLLM' in var_name or var_name in retain_var_list) + # The driver dummy worker does not actually use any resources. # It holds the resource for the driver worker. self.driver_dummy_worker: Optional[RayWorkerWrapper] = None @@ -198,11 +206,16 @@ def _init_workers_ray(self, placement_group: "PlacementGroup", )(RayWorkerWrapper).remote(vllm_config=self.vllm_config, rpc_rank=rank) else: + runtime_env_vars = { + k: v + for k, v in os.environ.items() if retain_envs(k) + } worker = ray.remote( num_cpus=0, num_gpus=0, resources={current_platform.ray_device_key: num_gpus}, scheduling_strategy=scheduling_strategy, + runtime_env={"env_vars": runtime_env_vars}, **ray_remote_kwargs, )(RayWorkerWrapper).remote(vllm_config=self.vllm_config, rpc_rank=rank)