From 54e5a300e0f321efd15f1a22941121bd8a7685d2 Mon Sep 17 00:00:00 2001 From: lhchg Date: Mon, 12 Jan 2026 10:30:39 +0800 Subject: [PATCH 1/3] enable ep32 for dispatch_ffn_combine Signed-off-by: lhchg --- vllm_ascend/ascend_forward_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_ascend/ascend_forward_context.py b/vllm_ascend/ascend_forward_context.py index 06f5df1db17..8243cca46b6 100644 --- a/vllm_ascend/ascend_forward_context.py +++ b/vllm_ascend/ascend_forward_context.py @@ -244,7 +244,7 @@ def select_moe_comm_method(num_tokens: int, # TODO: drop the EP-size guard when dispatch_ffn_combine supports larger EP sizes # TODO: drop speculative method guard when dispatch_gmm_combine_decode supports w16a16 fused_mc2_enable = envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2 and quant_type == "w8a8_dynamic" - dispatch_ffn_combine_enable = get_ep_group().world_size <= 16 and ( + dispatch_ffn_combine_enable = get_ep_group().world_size <= 32 and ( not is_draft_model) and (not dynamic_eplb) if num_tokens <= mc2_tokens_capacity: fused_decode_enable = fused_mc2_enable From 1cbfd2f4ed5aca8cfc6f6500afa39526d5e15d92 Mon Sep 17 00:00:00 2001 From: lhchg Date: Mon, 12 Jan 2026 16:09:47 +0800 Subject: [PATCH 2/3] change note Signed-off-by: lhchg --- vllm_ascend/envs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py index 2c1fae149fa..843b862c26c 100644 --- a/vllm_ascend/envs.py +++ b/vllm_ascend/envs.py @@ -1,4 +1,4 @@ -# +f # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. # This file is a part of the vllm-ascend project. # @@ -123,7 +123,7 @@ # Whether to enable fused mc2(`dispatch_gmm_combine_decode`/`dispatch_ffn_combine` operator) # 0, or not set: default ALLTOALL and MC2 will be used. # 1: ALLTOALL and MC2 might be replaced by `dispatch_ffn_combine` operator. - # `dispatch_ffn_combine` can be used only for moe layer with W8A8, EP<=16, non-mtp, non-dynamic-eplb. + # `dispatch_ffn_combine` can be used only for moe layer with W8A8, EP<=32, non-mtp, non-dynamic-eplb. # 2: MC2 might be replaced by `dispatch_gmm_combine_decode` operator. # `dispatch_gmm_combine_decode` can be used only for **decode node** moe layer # with W8A8. And MTP layer must be W8A8. From bbd1d8e5ef9c8706eedbdb71ba0c50f07d2f4420 Mon Sep 17 00:00:00 2001 From: lhchg Date: Mon, 12 Jan 2026 16:22:45 +0800 Subject: [PATCH 3/3] remove unused code Signed-off-by: lhchg --- vllm_ascend/envs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py index 843b862c26c..bc31abd1179 100644 --- a/vllm_ascend/envs.py +++ b/vllm_ascend/envs.py @@ -1,4 +1,4 @@ -f +# # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. # This file is a part of the vllm-ascend project. #