File tree Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -1486,22 +1486,24 @@ def determine_n_share_experts_fusion(
14861486 if self .n_share_experts_fusion > 0 :
14871487 # Only Deepseek V3/R1 can use shared experts fusion optimization now.
14881488 if (
1489- self .config .architectures [0 ] != architecture
1489+ not _is_cuda
1490+ or self .config .architectures [0 ] != architecture
14901491 or self .config .n_routed_experts != 256
14911492 ):
14921493 self .n_share_experts_fusion = 0
14931494 global_server_args_dict ["n_share_experts_fusion" ] = 0
14941495 log_info_on_rank0 (
14951496 logger ,
1496- "Only Deepseek V3/R1 can use shared experts fusion optimization. Shared experts fusion optimization is disabled." ,
1497+ "Only Deepseek V3/R1 on NV-platform can use shared experts fusion optimization. Shared experts fusion optimization is disabled." ,
14971498 )
14981499 else :
14991500 assert (
15001501 self .n_share_experts_fusion == self .tp_size
15011502 ), f"Shared experts fusion optimization is enabled in DeepSeek V3/R1, set it to { self .tp_size } can get best optimized performace."
15021503 elif self .n_share_experts_fusion == 0 :
15031504 if (
1504- torch .cuda .get_device_capability ("cuda" ) >= (9 , 0 )
1505+ _is_cuda
1506+ and torch .cuda .get_device_capability ("cuda" ) >= (9 , 0 )
15051507 and self .config .architectures [0 ] == architecture
15061508 and self .config .n_routed_experts == 256
15071509 and (not global_server_args_dict ["enable_deepep_moe" ])
You can’t perform that action at this time.
0 commit comments