Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion tests/ut/eplb/core/test_eplb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def setUp(self, mock_fix_incompatible_config):
self.vllm_config = vllm_config
self.moe_config = moe_config
self.mock_npu = patch("torch.Tensor.npu", new=lambda self: self).start()
os.environ["DYNAMIC_EPLB"] = "true"

def test_init_eplb_config_with_eplb(self):
eplb_config = init_ascend_config(self.vllm_config).eplb_config
Expand All @@ -71,6 +72,6 @@ def test_init_eplb_config_without_eplb(self):
eplb_config = init_ascend_config(self.vllm_config).eplb_config
_, expert_map, log2phy, redundant_experts = init_eplb_config(eplb_config, 0, self.moe_config)
gt_expert_map = torch.tensor([-1, -1, -1, -1, 0, 1, 2, 3])
print(expert_map, log2phy, redundant_experts)
self.assertIsNone(log2phy)
self.assertTrue(torch.equal(expert_map, gt_expert_map))
self.assertEqual(redundant_experts, 0)
9 changes: 9 additions & 0 deletions vllm_ascend/ascend_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ def __getattr__(self, key):

def _validate_config(self):
if self.expert_map_path is not None:
logger.info(f"The expert_map is {self.config['dynamic_eplb']}")
if self.expert_map_path[-5:] != ".json":
raise TypeError("The expert_map is not json.")
if not os.path.exists(self.expert_map_path):
Expand All @@ -402,6 +403,14 @@ def _validate_config(self):
raise ValueError(f"{key} must greater than 0; got {self.config[key]} instead")
if self.eplb_policy_type not in [0, 1, 2, 3]:
raise ValueError("eplb_policy_type must in [0, 1, 2, 3]")
if self.config["dynamic_eplb"]:
assert (
os.getenv("DYNAMIC_EPLB", "false").lower() in ("true", "1")
or os.getenv("EXPERT_MAP_RECORD", "false") == "true"
), "The environment variable DYNAMIC_EPLB or EXPERT_MAP_RECORD of the ePLB must be set to true."

logger.info(f"Dynamic EPLB is {self.config['dynamic_eplb']}")
logger.info(f"The number of redundant experts is {self.config['num_redundant_experts']}")


_ASCEND_CONFIG: AscendConfig | None = None
Expand Down
36 changes: 0 additions & 36 deletions vllm_ascend/eplb/adaptor/abstract_adaptor.py

This file was deleted.

4 changes: 1 addition & 3 deletions vllm_ascend/eplb/adaptor/vllm_adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,8 @@
import torch.distributed as dist
from vllm.logger import logger

from vllm_ascend.eplb.adaptor.abstract_adaptor import EplbAdaptor


class VllmEplbAdaptor(EplbAdaptor):
class VllmEplbAdaptor:
def __init__(self, model, **args):
super().__init__(**args)
self.model = model
Expand Down
38 changes: 8 additions & 30 deletions vllm_ascend/eplb/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,47 +28,25 @@ def get_log2phy_map(self, layer_id):
return self.model.layers[layer_id].mlp.experts.get_log2phy_map()


def get_all_expert_map(self, num_moe_layers):
all_loads = []
num_dense_layers = self.num_dense_layers if hasattr(self, "num_dense_layers") else 0
for layer_id in range(num_moe_layers):
load_tensor = self.get_expert_map(layer_id + num_dense_layers) # (num_experts_per_layer,)
all_loads.append(load_tensor)

return torch.stack(all_loads, dim=0)


def get_all_moe_loads(self):
num_dense_layers = self.num_dense_layers if hasattr(self, "num_dense_layers") else 0
num_dense_layers = getattr(self.model.config, "first_k_dense_replace", 0)
num_layers = self.model.config.num_hidden_layers
all_moe_loads = torch.stack(
[
self.model.layers[layer_id + num_dense_layers].mlp.experts.moe_load
for layer_id in range(self.num_moe_layers)
],
[self.model.layers[layer_id].mlp.experts.moe_load for layer_id in range(num_dense_layers, num_layers)],
dim=0,
)
return all_moe_loads


def clear_all_moe_loads(self):
num_dense_layers = self.num_dense_layers if hasattr(self, "num_dense_layers") else 0
for layer_id in range(self.num_moe_layers):
self.model.layers[layer_id + num_dense_layers].mlp.experts.clear_moe_load()
num_dense_layers = getattr(self.model.config, "first_k_dense_replace", 0)
num_layers = self.model.config.num_hidden_layers
for layer_id in range(num_dense_layers, num_layers):
self.model.layers[layer_id].mlp.experts.clear_moe_load()


def model_register(model, model_config):
def model_register(model):
model.get_expert_map = types.MethodType(get_expert_map, model)
model.get_log2phy_map = types.MethodType(get_log2phy_map, model)
model.get_all_expert_map = types.MethodType(get_all_expert_map, model)
model.get_all_moe_loads = types.MethodType(get_all_moe_loads, model)
model.clear_all_moe_loads = types.MethodType(clear_all_moe_loads, model)

config = model_config.hf_text_config

if config.model_type == "qwen3_moe":
model.num_moe_layers = config.num_hidden_layers
elif config.model_type == "deepseek_v2" or config.model_type == "deepseek_v3":
model.num_dense_layers = config.first_k_dense_replace
model.num_moe_layers = config.num_hidden_layers - model.num_dense_layers
else:
raise NotImplementedError("EPLB is not supported.")
2 changes: 1 addition & 1 deletion vllm_ascend/worker/model_runner_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -2308,7 +2308,7 @@ def load_model(self) -> None:
with DeviceMemoryProfiler() as m: # noqa: SIM117
self.model = get_model(vllm_config=self.vllm_config)
if self.dynamic_eplb:
model_register(self.model, self.model_config)
model_register(self.model)
if self.drafter:
logger.info("Loading drafter model...")
with get_tp_context(self.drafter):
Expand Down