diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 9e66c282801f..f3488dc27576 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -2171,7 +2171,8 @@ def _handle_model_specific_adjustments(self): ) # TRTLLM AllReduce Fusion supports SM90/100, enable it by default - # for models with explicit support (DeepseekV3, GptOss, Glm4Moe, Qwen3Moe) + # for models with explicit support (DeepseekV3, GptOss, Glm4Moe, + # Qwen3/Qwen3Next/Qwen3.5 MoE families) # TODO: currently, it is only supported in the single node scenario. https://github.com/flashinfer-ai/flashinfer/issues/2006 # TODO: there is currently a bug on H20 device specifically, https://github.com/flashinfer-ai/flashinfer/issues/2204 device_name = get_device_name() @@ -2189,6 +2190,7 @@ def _handle_model_specific_adjustments(self): "Glm4MoeForCausalLM", "Glm4MoeLiteForCausalLM", "Qwen3MoeForCausalLM", + "Qwen3NextForCausalLM", "KimiK25ForConditionalGeneration", "Qwen3_5MoeForConditionalGeneration", "Qwen3_5ForConditionalGeneration",