diff --git a/vllm_ascend/ops/fused_moe/moe_comm_method.py b/vllm_ascend/ops/fused_moe/moe_comm_method.py index 41cad9af483..458557e97ca 100644 --- a/vllm_ascend/ops/fused_moe/moe_comm_method.py +++ b/vllm_ascend/ops/fused_moe/moe_comm_method.py @@ -343,7 +343,7 @@ def fused_experts( ep_rank_size=self.token_dispatcher.ep_world_size, ep_rank_id=self.token_dispatcher.ep_rank_id, moe_expert_num=self.moe_config.num_experts, - global_bs=self.token_dispatcher.fused_global_bs) + global_bs=self.token_dispatcher.global_bs) else: raise ValueError( f"Wrong value of {envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2=}") diff --git a/vllm_ascend/ops/fused_moe/token_dispatcher.py b/vllm_ascend/ops/fused_moe/token_dispatcher.py index b046e9536a9..a783da197e5 100644 --- a/vllm_ascend/ops/fused_moe/token_dispatcher.py +++ b/vllm_ascend/ops/fused_moe/token_dispatcher.py @@ -137,7 +137,6 @@ def __init__(self, **kwargs): max_num_tokens = min(max_num_reqs * uniform_decode_query_len, 512) num_tokens_per_tp_rank = (max_num_tokens + tp_size - 1) // tp_size self.global_bs = num_tokens_per_tp_rank * self.ep_world_size - self.fused_global_bs = max_num_tokens * self.ep_world_size def get_dispatch_mc2_kwargs( self,