vllm-project · wangxiyuan · Jan 6, 2026 · Dec 23, 2025 · Dec 23, 2025 · Dec 25, 2025
@@ -47,8 +47,8 @@ def test_generate_task_and_state_flow(mock_adaptor):
         loader_obj.state = loader.ExpertWeightUpdateState.WAITING
 
         loader_obj.generate_expert_d2d_transfer_task([], [], {}, 0)
-        assert loader_obj.comm_op_list is None
-        assert loader_obj.state == loader.ExpertWeightUpdateState.WAITING
+        assert not loader_obj.comm_op_list
+        assert loader_obj.state == loader.ExpertWeightUpdateState.READY
 
 
 def test_asyn_transfer_and_update(mock_adaptor):

@@ -24,7 +24,7 @@ def setUp(self):
         self.moe_config.tp_size = 1
         self.moe_config.ep_size = 1
         self.moe_config.dp_group = MagicMock()
-        self.moe_config.num_global_redundant_experts = 0
+        self.moe_config.global_redundant_expert_num = 0
 
     @patch("vllm_ascend.ops.fused_moe.moe_comm_method.get_forward_context")
     @patch(

@@ -144,48 +144,11 @@ def test_get_combine_mc_kwargs_with_quant(self):
 
         self.dispatcher.need_extra_args = True
         self.dispatcher.enable_dispatch_v2 = True
-
+        self.dispatcher.moe_expert_num = len(expert_map)
         kwargs = self.dispatcher.get_combine_mc_kwargs(hidden_states,
                                                        context_metadata)
         self.assertIn("tp_send_counts", kwargs)
 
-    def test_token_combine_with_shared_experts(self):
-        shared_experts = MagicMock()
-        shared_experts.down_proj.return_value = (torch.randn(10, 128),
-                                                 torch.tensor(1.0))
-
-        topk_ids = torch.randint(0, 8, (10, 1))
-        topk_weights = torch.randn(10, 1)
-        expert_map = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7])
-        ep_recv_counts = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7])
-        assist_info_for_combine = torch.arange(10)
-        tp_recv_counts = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7])
-
-        context_metadata = {
-            "topk_ids": topk_ids,
-            "topk_weights": topk_weights,
-            "expert_map": expert_map,
-            "ep_recv_counts": ep_recv_counts,
-            "mc2_mask": None,
-            "assist_info_for_combine": assist_info_for_combine,
-            "expand_scales": None,
-            "shared_experts": shared_experts,
-            "shared_act": torch.randn(10, 128),
-            "swiglu_out_scale": torch.randn(10, 1),
-            "tp_recv_counts": tp_recv_counts
-        }
-
-        self.dispatcher.with_quant = True
-        self.dispatcher.need_extra_args = True
-        self.dispatcher.enable_dispatch_v2 = True
-
-        hidden_states = torch.randn(10, 128)
-        with patch("torch_npu.npu_moe_distribute_combine_v2",
-                   return_value=torch.randn(10, 128)):
-            result = self.dispatcher.token_combine(hidden_states,
-                                                   context_metadata)
-            self.assertIsInstance(result, tuple)
-
 
 class TestTokenDispatcherWithAllGather(TestBase):
 

@@ -50,10 +50,6 @@ def generate_expert_d2d_transfer_task(self, expert_send_info,
             )
             return
 
-        # If neither send nor receive task is needed for this layer on this rank, return
-        if not (expert_send_info or expert_recv_info):
-            return
-
         self.updated_expert_map = updated_expert_map
 
         self.layer_id = layer_id

@@ -234,7 +234,7 @@ def __init__(self, *args, **kwargs):
 
         self.moe_config.num_experts = self.global_num_experts
         self.moe_config.num_local_experts = self.local_num_experts
-        self.moe_config.original_num_experts = num_experts
+        self.moe_config.global_redundant_expert_num = self.global_redundant_expert_num
 
         moe_quant_params = {
             "num_experts": local_num_experts,

@@ -105,7 +105,6 @@ def fused_experts(
             dynamic_scale_for_share: Optional[Any] = None,
             # For load balance
             log2phy: torch.Tensor = None,
-            global_redundant_expert_num: int = 0,
             need_trans: bool = False,
             dynamic_eplb: bool = False,
             mc2_mask: torch.Tensor = None,
@@ -124,7 +123,8 @@ def fused_experts(
             topk_ids=topk_ids,
             expert_map=expert_map,
             log2phy=log2phy,
-            global_redundant_expert_num=global_redundant_expert_num,
+            global_redundant_expert_num=self.moe_config.
+            global_redundant_expert_num,
             shared_experts=shared_experts,
             quantized_x_for_share=quantized_x_for_share,
             dynamic_scale_for_share=dynamic_scale_for_share,
@@ -283,7 +283,6 @@ def fused_experts(
             dynamic_scale_for_share: Optional[Any] = None,
             # For load balance
             log2phy: torch.Tensor = None,
-            global_redundant_expert_num: int = 0,
             need_trans: bool = False,
             dynamic_eplb: bool = False,
             mc2_mask: torch.Tensor = None,