diff --git a/python/sglang/srt/layers/moe/token_dispatcher/deepep.py b/python/sglang/srt/layers/moe/token_dispatcher/deepep.py index 24dd42b41c3d..7c87044391de 100644 --- a/python/sglang/srt/layers/moe/token_dispatcher/deepep.py +++ b/python/sglang/srt/layers/moe/token_dispatcher/deepep.py @@ -377,6 +377,7 @@ def dispatch_a( if ( deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM and not get_moe_runner_backend().is_cutlass() + and not get_bool_env_var("SGLANG_DEEPEP_BF16_DISPATCH") ): # TODO hard code 128 block quant,use fp8 communication hidden_states = sglang_per_token_group_quant_fp8(