Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions tests/ut/eplb/core/test_eplb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,14 @@ def setUp(self):
self.moe_config = moe_config
self.mock_npu = patch("torch.Tensor.npu",
new=lambda self: self).start()
self.rank = 1

def test_init_eplb_config_with_eplb(self):
eplb_config = init_ascend_config(self.vllm_config).eplb_config
expert_map, log2phy, redundant_experts = init_eplb_config(
_, expert_map, log2phy, redundant_experts = init_eplb_config(
eplb_config, 0, self.moe_config)
gt_expert_map = torch.tensor([4, -1, -1, -1, 0, 1, 2, 3])
gt_log2phy = torch.tensor([9, 1, 2, 3, 5, 6, 7, 8])
self.assertTrue(torch.equal(expert_map[self.rank], gt_expert_map))
self.assertTrue(torch.equal(expert_map, gt_expert_map))
self.assertTrue(torch.equal(log2phy, gt_log2phy))
self.assertEqual(redundant_experts, 2)

Expand All @@ -51,20 +50,20 @@ def test_init_eplb_config_with_eplb_withmap(self):
self.vllm_config.additional_config["eplb_config"][
"expert_map_path"] = _TEST_DIR + "/expert_map.json"
eplb_config = init_ascend_config(self.vllm_config).eplb_config
expert_map, log2phy, redundant_experts = init_eplb_config(
_, expert_map, log2phy, redundant_experts = init_eplb_config(
eplb_config, 0, self.moe_config)
gt_expert_map = torch.tensor([-1, 1, 4, -1, 2, -1, 0, 3])
gt_log2phy = torch.tensor([2, 6, 9, 3, 7, 4, 5, 8])
self.assertTrue(torch.equal(expert_map[self.rank], gt_expert_map))
self.assertTrue(torch.equal(expert_map, gt_expert_map))
self.assertTrue(torch.equal(log2phy, gt_log2phy))
self.assertEqual(redundant_experts, 2)

def test_init_eplb_config_without_eplb(self):
self.vllm_config.additional_config = {"refresh": True}
eplb_config = init_ascend_config(self.vllm_config).eplb_config
expert_map, log2phy, redundant_experts = init_eplb_config(
_, expert_map, log2phy, redundant_experts = init_eplb_config(
eplb_config, 0, self.moe_config)
gt_expert_map = torch.tensor([-1, -1, -1, -1, 0, 1, 2, 3])
print(expert_map, log2phy, redundant_experts)
self.assertTrue(torch.equal(expert_map[self.rank], gt_expert_map))
self.assertTrue(torch.equal(expert_map, gt_expert_map))
self.assertEqual(redundant_experts, 0)
2 changes: 1 addition & 1 deletion vllm_ascend/eplb/adaptor/vllm_adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def get_global_expert_map(self):
all_layer_global_expert_map = []
for layer_id in range(self.num_moe_layers):
map_cpu = self.model.model.layers[
layer_id].mlp.experts.global_expert_map.cpu()
self.num_dense_layers + layer_id].mlp.experts.global_expert_map.cpu()
all_layer_global_expert_map.append(map_cpu)
self.expert_map_per_layer_cpu[self.num_dense_layers +
layer_id] = map_cpu[self.rank_id]
Comment thread
shenchuxiaofugui marked this conversation as resolved.
Expand Down
6 changes: 4 additions & 2 deletions vllm_ascend/eplb/core/eplb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,18 +81,20 @@ def init_eplb_config(eplb_config, layer_id, moe_config):

if ep_size == 1:
assert not eplb_enable, "EPLB must used in expert parallelism."
return None, None, n_redundant
return None, None, None, n_redundant
global_expert_map = []
for rankid in range(ep_size):
expert_map = torch.full((n_experts, ), -1, dtype=torch.int32)
local_placement = global_placement[rankid]
expert_map[local_placement] = torch.arange(local_placement.shape[0],
dtype=torch.int32)
global_expert_map.append(expert_map)
if rankid == moe_config.ep_rank:
local_expert_map = expert_map.npu()
log2phy = generate_log2phy_map(
global_expert_map, moe_config.ep_rank).npu() if eplb_enable else None

return torch.stack(global_expert_map), log2phy, n_redundant
return torch.stack(global_expert_map), local_expert_map, log2phy, n_redundant


def generate_log2phy_map(global_expert_map, ep_rank):
Expand Down
4 changes: 1 addition & 3 deletions vllm_ascend/ops/fused_moe/fused_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,8 @@ def __init__(self, *args, **kwargs):

# init moe
eplb_config = ascend_config.eplb_config
self.global_expert_map, self.log2phy, self.global_redundant_expert_num = init_eplb_config(
self.global_expert_map, self._expert_map, self.log2phy, self.global_redundant_expert_num = init_eplb_config(
eplb_config, self.moe_instance_id, self.moe_config)
if self.global_expert_map is not None:
self._expert_map = self.global_expert_map[self.ep_rank].npu()
self.global_num_experts = num_experts + self.global_redundant_expert_num
self.dynamic_eplb = eplb_config.dynamic_eplb and (self.log2phy
is not None)
Expand Down