diff --git a/vllm_ascend/eplb/eplb_updator.py b/vllm_ascend/eplb/eplb_updator.py index f2a5b695060..89721702de3 100644 --- a/vllm_ascend/eplb/eplb_updator.py +++ b/vllm_ascend/eplb/eplb_updator.py @@ -34,6 +34,7 @@ def __init__(self, ascend_config, loader, eplb_process: EplbProcess, self.eplb_loader = loader self.eplb_process = eplb_process self.shared_dict = self.eplb_process.shared_dict + self.moe_imbalance_dict: dict[int, float] = {} def set_adaptor(self, adaptor): self.adaptor = adaptor @@ -173,8 +174,48 @@ def compute_and_set_moe_load(self, is_clear=False): logger.debug( f"[ModelRunner] Updated shared_dict['moe_load'] shape={moe_load.shape}" ) + + if dist.is_initialized() and dist.get_rank() == 0: + self.compute_moe_imbalance(moe_load) + self.summarize_moe_imbalance() + return moe_load + def compute_moe_imbalance(self, moe_load: torch.Tensor): + + self.moe_imbalance_dict.clear() + + layer_card_load = moe_load.sum(dim=-1).cpu().float() + + for layer_idx in range(layer_card_load.size(0)): + layer_load = layer_card_load[layer_idx] + + mean_load = layer_load.mean().item() + max_load = layer_load.max().item() + + moe_load_imbalance = max_load / (mean_load + 1e-6) + + logger.debug(f"[ModelRunner][MOE_load_stats][Layer {layer_idx}] " + f"PAR={moe_load_imbalance:.4f}") + + self.moe_imbalance_dict[layer_idx] = moe_load_imbalance + + def summarize_moe_imbalance(self): + + values = list(self.moe_imbalance_dict.values()) + if not values: + logger.info("[MOE_load_stats] No data available.") + return + + avg_imbalance = sum(values) / len(values) + max_imbalance = max(values) + min_imbalance = min(values) + + logger.info( + f"[ModelRunner][MOE_load_stats] Peak-to-Average-Ratio: " + f"Mean={avg_imbalance:.4f}, Max={max_imbalance:.4f}, Min={min_imbalance:.4f}" + ) + def warm_up_eplb(self): self.get_init_expert_map()