vllm-project · wangxiyuan · Jan 19, 2026 · Jan 15, 2026
@@ -34,15 +34,14 @@ def setUp(self):
         self.moe_config = moe_config
         self.mock_npu = patch("torch.Tensor.npu",
                               new=lambda self: self).start()
-        self.rank = 1
 
     def test_init_eplb_config_with_eplb(self):
         eplb_config = init_ascend_config(self.vllm_config).eplb_config
-        expert_map, log2phy, redundant_experts = init_eplb_config(
+        _, expert_map, log2phy, redundant_experts = init_eplb_config(
             eplb_config, 0, self.moe_config)
         gt_expert_map = torch.tensor([4, -1, -1, -1, 0, 1, 2, 3])
         gt_log2phy = torch.tensor([9, 1, 2, 3, 5, 6, 7, 8])
-        self.assertTrue(torch.equal(expert_map[self.rank], gt_expert_map))
+        self.assertTrue(torch.equal(expert_map, gt_expert_map))
         self.assertTrue(torch.equal(log2phy, gt_log2phy))
         self.assertEqual(redundant_experts, 2)
 
@@ -51,20 +50,20 @@ def test_init_eplb_config_with_eplb_withmap(self):
         self.vllm_config.additional_config["eplb_config"][
             "expert_map_path"] = _TEST_DIR + "/expert_map.json"
         eplb_config = init_ascend_config(self.vllm_config).eplb_config
-        expert_map, log2phy, redundant_experts = init_eplb_config(
+        _, expert_map, log2phy, redundant_experts = init_eplb_config(
             eplb_config, 0, self.moe_config)
         gt_expert_map = torch.tensor([-1, 1, 4, -1, 2, -1, 0, 3])
         gt_log2phy = torch.tensor([2, 6, 9, 3, 7, 4, 5, 8])
-        self.assertTrue(torch.equal(expert_map[self.rank], gt_expert_map))
+        self.assertTrue(torch.equal(expert_map, gt_expert_map))
         self.assertTrue(torch.equal(log2phy, gt_log2phy))
         self.assertEqual(redundant_experts, 2)
 
     def test_init_eplb_config_without_eplb(self):
         self.vllm_config.additional_config = {"refresh": True}
         eplb_config = init_ascend_config(self.vllm_config).eplb_config
-        expert_map, log2phy, redundant_experts = init_eplb_config(
+        _, expert_map, log2phy, redundant_experts = init_eplb_config(
             eplb_config, 0, self.moe_config)
         gt_expert_map = torch.tensor([-1, -1, -1, -1, 0, 1, 2, 3])
         print(expert_map, log2phy, redundant_experts)
-        self.assertTrue(torch.equal(expert_map[self.rank], gt_expert_map))
+        self.assertTrue(torch.equal(expert_map, gt_expert_map))
         self.assertEqual(redundant_experts, 0)
@@ -188,7 +188,7 @@ def get_global_expert_map(self):
         all_layer_global_expert_map = []
         for layer_id in range(self.num_moe_layers):
             map_cpu = self.model.model.layers[
-                layer_id].mlp.experts.global_expert_map.cpu()
+                self.num_dense_layers + layer_id].mlp.experts.global_expert_map.cpu()
             all_layer_global_expert_map.append(map_cpu)
             self.expert_map_per_layer_cpu[self.num_dense_layers +
                                           layer_id] = map_cpu[self.rank_id]

@@ -81,18 +81,20 @@ def init_eplb_config(eplb_config, layer_id, moe_config):
 
     if ep_size == 1:
         assert not eplb_enable, "EPLB must used in expert parallelism."
-        return None, None, n_redundant
+        return None, None, None, n_redundant
     global_expert_map = []
     for rankid in range(ep_size):
         expert_map = torch.full((n_experts, ), -1, dtype=torch.int32)
         local_placement = global_placement[rankid]
         expert_map[local_placement] = torch.arange(local_placement.shape[0],
                                                    dtype=torch.int32)
         global_expert_map.append(expert_map)
+        if rankid == moe_config.ep_rank:
+            local_expert_map = expert_map.npu()
     log2phy = generate_log2phy_map(
         global_expert_map, moe_config.ep_rank).npu() if eplb_enable else None
 
-    return torch.stack(global_expert_map), log2phy, n_redundant
+    return torch.stack(global_expert_map), local_expert_map, log2phy, n_redundant
 
 
 def generate_log2phy_map(global_expert_map, ep_rank):

@@ -202,10 +202,8 @@ def __init__(self, *args, **kwargs):
 
         # init moe
         eplb_config = ascend_config.eplb_config
-        self.global_expert_map, self.log2phy, self.global_redundant_expert_num = init_eplb_config(
+        self.global_expert_map, self._expert_map, self.log2phy, self.global_redundant_expert_num = init_eplb_config(
             eplb_config, self.moe_instance_id, self.moe_config)
-        if self.global_expert_map is not None:
-            self._expert_map = self.global_expert_map[self.ep_rank].npu()
         self.global_num_experts = num_experts + self.global_redundant_expert_num
         self.dynamic_eplb = eplb_config.dynamic_eplb and (self.log2phy
                                                           is not None)