Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 41 additions & 59 deletions tests/ut/eplb/core/test_eplb_utils.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,49 @@
import os
import random
import sys
import unittest
from unittest.mock import patch

# isort: off
import pytest
import torch
from vllm.config import VllmConfig
from vllm.model_executor.layers.fused_moe.config import (FusedMoEConfig,
FusedMoEParallelConfig
)

from vllm_ascend.ascend_config import init_ascend_config
from vllm_ascend.eplb.core.eplb_utils import EPLBParamUtils, init_eplb_config
# isort: on


class TestAscendConfig(unittest.TestCase):

def setUp(self):
vllm_config = VllmConfig()
ascend_config = init_ascend_config(vllm_config)
ascend_config.dynamic_eplb = True
ascend_config.init_redundancy_expert = 2
moe_parallel_config = FusedMoEParallelConfig(2, 0, 1, 2, 1, 1, 1, 1,
True, "hccl")
moe_config = FusedMoEConfig(8, 8, 8192, 5, moe_parallel_config,
torch.float16)
moe_config.supports_eplb = True
self.ascend_config = ascend_config
self.moe_config = moe_config
self.mock_npu = patch("torch.Tensor.npu",
new=lambda self: self).start()

def test_init_eplb_config_with_eplb(self):
expert_map, log2phy, redundant_experts = init_eplb_config(
self.ascend_config, 0, self.moe_config)
gt_expert_map = torch.tensor([4, -1, -1, -1, 0, 1, 2, 3])
gt_log2phy = torch.tensor([9, 1, 2, 3, 5, 6, 7, 8])
self.assertTrue(torch.equal(expert_map, gt_expert_map))
self.assertTrue(torch.equal(log2phy, gt_log2phy))
self.assertEqual(redundant_experts, 2)

def test_init_eplb_config_with_eplb_withmap(self):
_TEST_DIR = os.path.dirname(__file__)
self.ascend_config.expert_map_path = _TEST_DIR + "/expert_map.json"
expert_map, log2phy, redundant_experts = init_eplb_config(
self.ascend_config, 0, self.moe_config)
gt_expert_map = torch.tensor([-1, 1, 4, -1, 2, -1, 0, 3])
gt_log2phy = torch.tensor([2, 6, 9, 3, 7, 4, 5, 8])
self.assertTrue(torch.equal(expert_map, gt_expert_map))
self.assertTrue(torch.equal(log2phy, gt_log2phy))
self.assertEqual(redundant_experts, 2)

def test_init_eplb_config_without_eplb(self):
self.ascend_config.dynamic_eplb = False
self.ascend_config.expert_map_path = None
expert_map, log2phy, redundant_experts = init_eplb_config(
self.ascend_config, 0, self.moe_config)
gt_expert_map = torch.tensor([-1, -1, -1, -1, 0, 1, 2, 3])
print(expert_map, log2phy, redundant_experts)
self.assertTrue(torch.equal(expert_map, gt_expert_map))
self.assertEqual(redundant_experts, 0)

from vllm_ascend.eplb.core import eplb_utils
from vllm_ascend.eplb.core.eplb_utils import EPLBParamUtils


def test_generate_log2phy_map_single_rank_holding():

expert_map = torch.tensor([[0, -1], [-1, 0]], dtype=torch.int32)
log2phy_map = eplb_utils.generate_log2phy_map(expert_map)

assert torch.all(log2phy_map[:, 0] == log2phy_map[0, 0])
assert torch.all(log2phy_map[:, 1] == log2phy_map[1, 1])


def test_generate_log2phy_map_multiple_rank_holding(monkeypatch):

expert_map = torch.tensor([[0], [0]], dtype=torch.int32)

monkeypatch.setattr(random, "choice", lambda x: x[0])

log2phy_map = eplb_utils.generate_log2phy_map(expert_map)

assert log2phy_map.shape == (2, 1)
assert (log2phy_map >= 0).all()


def test_determine_default_log2phy_map_world_size_1():
log2phy = eplb_utils.determine_default_log2phy_map(global_expert_num=3,
world_size=1,
rank_id=0)
assert log2phy.shape == (3, )
assert (log2phy >= 0).all()


def test_determine_default_log2phy_map_world_size_multiple():
log2phy = eplb_utils.determine_default_log2phy_map(global_expert_num=6,
world_size=2,
rank_id=1)
assert log2phy.shape == (6, )
assert (log2phy >= 0).all()


class TestEPLBParamUtils:
Expand Down
File renamed without changes.
140 changes: 140 additions & 0 deletions tests/ut/ops/test_expert_load_balancer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#

import json
import os
from typing import List, TypedDict
from unittest import mock

import torch

from tests.ut.base import TestBase
from vllm_ascend.ops.expert_load_balancer import ExpertLoadBalancer


class Device(TypedDict):
device_id: int
device_expert: List[int]


class Layer(TypedDict):
layer_id: int
device_count: int
device_list: List[Device]


class MockData(TypedDict):
moe_layer_count: int
layer_list: List[Layer]


class TestExpertLoadBalancer(TestBase):

def setUp(self):
_TEST_DIR = os.path.dirname(__file__)
json_file = _TEST_DIR + "/expert_map.json"
with open(json_file, 'r') as f:
self.expert_map: MockData = json.load(f)

self.expert_load_balancer = ExpertLoadBalancer(json_file, 8)

def test_init(self):

self.assertIsInstance(self.expert_load_balancer.expert_map_tensor,
torch.Tensor)
self.assertEqual(self.expert_load_balancer.layers_num,
self.expert_map["moe_layer_count"])
self.assertEqual(self.expert_load_balancer.ranks_num,
self.expert_map["layer_list"][0]["device_count"])

def test_generate_index_dicts(self):
tensor_2d = torch.tensor([[7, 2, 0, 3, 5], [6, 1, 4, 7, 2]])
result = self.expert_load_balancer.generate_index_dicts(tensor_2d)
expected_result = [{
7: 0,
2: 1,
0: 2,
3: 3,
5: 4
}, {
6: 5,
1: 6,
4: 7,
7: 8,
2: 9
}]
self.assertEqual(result, expected_result)

def test_generate_expert_placement_map(self):
expert_placement_map = self.expert_load_balancer.generate_expert_placement_map(
)
self.assertEqual(expert_placement_map.shape,
(self.expert_load_balancer.layers_num,
self.expert_load_balancer.ranks_num, 10))
self.assertTrue(torch.all(expert_placement_map >= -1))

def test_generate_log2phy_expert_map(self):
layer_id = 0
log2phy_map = self.expert_load_balancer.generate_log2phy_expert_map(
layer_id)
self.assertEqual(log2phy_map.shape,
(self.expert_load_balancer.ranks_num, 10))
self.assertTrue(torch.all(log2phy_map >= -1))

@mock.patch("torch_npu.npu._lazy_init")
@mock.patch("torch.npu.current_device", return_value="cpu")
def test_get_rank_placement_map(self, mock_current_device, mock_lazy_init):
layer_id = 0
rank_id = 0
rank_local_expert_num, rank_expert_map = self.expert_load_balancer.get_rank_placement_map(
layer_id, rank_id)
self.assertEqual(rank_local_expert_num, 5)
expected_tensor = torch.tensor([2, -1, 1, 3, -1, 4, -1, 0, -1, -1],
dtype=torch.int32).to(
rank_expert_map.device)
self.assertTrue(rank_expert_map.equal(expected_tensor))

rank_id = 1
rank_local_expert_num, rank_expert_map = self.expert_load_balancer.get_rank_placement_map(
layer_id, rank_id)
expected_tensor = torch.tensor([-1, 1, 4, -1, 2, -1, 0, 3, -1, -1],
dtype=torch.int32).to(
rank_expert_map.device)
self.assertTrue(rank_expert_map.equal(expected_tensor))

def test_get_rank_log2phy_map(self):
layer_id = 0
rank_id = 0
log2phy_map = self.expert_load_balancer.get_rank_log2phy_map(
layer_id, rank_id)
expected_tensor = torch.tensor([2, 6, 1, 3, 7, 4, 5, 0, -1, -1],
dtype=torch.int32).to(
log2phy_map.device)
self.assertTrue(log2phy_map.equal(expected_tensor))

rank_id = 1
log2phy_map = self.expert_load_balancer.get_rank_log2phy_map(
layer_id, rank_id)
expected_tensor = torch.tensor([2, 6, 9, 3, 7, 4, 5, 8, -1, -1],
dtype=torch.int32).to(
log2phy_map.device)
self.assertTrue(log2phy_map.equal(expected_tensor))

def test_get_global_redundant_expert_num(self):
redundant_expert_num = self.expert_load_balancer.get_global_redundant_expert_num(
)
expected_redundant_expert_num = len(self.expert_map["layer_list"][0]["device_list"][0]["device_expert"]) * \
self.expert_map["layer_list"][0]["device_count"] - 8
self.assertEqual(redundant_expert_num, expected_redundant_expert_num)
4 changes: 2 additions & 2 deletions tests/ut/ops/test_fused_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ def mock_finalize(hidden_states, **kwargs):
enable_multistream_moe=False,
expert_map_path=None
)), \
patch('vllm_ascend.ops.fused_moe.fused_moe.init_eplb_config',
return_value=(torch.tensor([0, 1, 2, -1, -1, -1, -1, -1]), None, 0)), \
patch('vllm_ascend.ops.fused_moe.fused_moe.determine_expert_map',
return_value=(3, torch.tensor([0, 1, 2, -1, -1, -1, -1, -1]))), \
patch('vllm_ascend.ops.fused_moe.fused_moe.get_forward_context',
return_value=mock_forward_context_obj), \
patch('vllm_ascend.ops.fused_moe.prepare_finalize.get_forward_context',
Expand Down
Loading
Loading