diff --git a/scripts/performance/configs/qwen/qwen3_llm_pretrain.py b/scripts/performance/configs/qwen/qwen3_llm_pretrain.py index 2eecd74d59..c7a295dbc3 100644 --- a/scripts/performance/configs/qwen/qwen3_llm_pretrain.py +++ b/scripts/performance/configs/qwen/qwen3_llm_pretrain.py @@ -433,6 +433,7 @@ def qwen3_next_80b_a3b_pretrain_config_h100( cfg = qwen3_next_80b_a3b_pretrain_config() cfg.mixed_precision = precision_config cfg.comm_overlap = CommOverlapConfig(tp_comm_overlap=True) + cfg.model.moe_token_dispatcher_type = "alltoall" set_qwen3_next_common_configs(cfg) set_workload_base_configs(cfg, base_cfg)