Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions tests/e2e/multicard/2-cards/test_qwen3_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,6 @@ def test_qwen3_moe_distributed_aiv_tp2():

@pytest.mark.asyncio
async def test_qwen3_moe_w8a8_distributed_tp2_ep_dynamic_eplb():
from vllm_ascend.utils import vllm_version_is

if not vllm_version_is("0.17.0"):
pytest.skip(
"EPLB output is different without EPLB, see issue: https://github.com/vllm-project/vllm-ascend/issues/7408",
)
model = "vllm-ascend/Qwen3-30B-A3B-W8A8"
port = get_open_port()
compilation_config = json.dumps({"cudagraph_capture_sizes": [8]})
Expand Down
5 changes: 4 additions & 1 deletion vllm_ascend/worker/model_runner_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):

eplb_config = self.ascend_config.eplb_config
self.dynamic_eplb = eplb_config.dynamic_eplb
self.eplb_enable = self.dynamic_eplb or (eplb_config.expert_map_path is not None)
if self.dynamic_eplb:
self.is_eplb_warmuped = False
self.policy_type = eplb_config.eplb_policy_type
Expand Down Expand Up @@ -2554,7 +2555,9 @@ def load_model(self) -> None:
logger.info("Starting to load model %s...", self.model_config.model)

with DeviceMemoryProfiler() as m: # noqa: SIM117
self.model = get_model(vllm_config=self.vllm_config)
if self.eplb_enable:
self.vllm_config.parallel_config.enable_eplb = True
self.model: nn.Module = get_model(vllm_config=self.vllm_config)
if self.dynamic_eplb:
model_register(self.model)
if self.drafter:
Expand Down
Loading