vllm-project · esmeetu · May 19, 2024 · May 19, 2024 · May 19, 2024 · May 19, 2024
diff --git a/vllm/distributed/device_communicators/custom_all_reduce.py b/vllm/distributed/device_communicators/custom_all_reduce.py
@@ -138,21 +138,10 @@ def __init__(self,
         else:
             device_ids = list(range(torch.cuda.device_count()))
 
-        physical_device_id = device_ids[device.index]
-        tensor = torch.tensor([physical_device_id],
-                              dtype=torch.int,
-                              device="cpu")
-        gather_list = [
-            torch.tensor([0], dtype=torch.int, device="cpu")
-            for _ in range(world_size)
-        ]
-        dist.all_gather(gather_list, tensor, group=self.group)
-        physical_device_ids = [t.item() for t in gather_list]
-
         # test nvlink first, this will filter out most of the cases
         # where custom allreduce is not supported
         # this checks hardware and driver support for NVLink
-        full_nvlink = _is_full_nvlink(physical_device_ids)
+        full_nvlink = _is_full_nvlink(device_ids)
         if world_size > 2 and not full_nvlink:
             logger.warning(
                 "Custom allreduce is disabled because it's not supported on"