Skip to content

Commit 10a9ab7

Browse files
authored
Fix error due to CustomAllreduce setup failure (#4815)
Signed-off-by: Kebe <[email protected]>
1 parent bb0fd74 commit 10a9ab7

File tree

1 file changed

+10
-4
lines changed

1 file changed

+10
-4
lines changed

python/sglang/srt/distributed/parallel_state.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -264,10 +264,16 @@ def __init__(
264264
self.ca_comm: Optional[CustomAllreduce] = None
265265
if use_custom_allreduce and self.world_size > 1:
266266
# Initialize a custom fast all-reduce implementation.
267-
self.ca_comm = CustomAllreduce(
268-
group=self.cpu_group,
269-
device=self.device,
270-
)
267+
try:
268+
self.ca_comm = CustomAllreduce(
269+
group=self.cpu_group,
270+
device=self.device,
271+
)
272+
except Exception as e:
273+
logger.warning(
274+
f"Setup Custom allreduce failed with {e}. To silence this "
275+
"warning, specify --disable-custom-all-reduce explicitly."
276+
)
271277

272278
from sglang.srt.distributed.device_communicators.hpu_communicator import (
273279
HpuCommunicator,

0 commit comments

Comments
 (0)