diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 0146ee4c144a..88e6660e2161 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -209,7 +209,9 @@ def enable_mla_dual_rms_norm_fusion(cfg: "VllmConfig") -> bool: "use_inductor_graph_partition": False, }, "kernel_config": { - "enable_flashinfer_autotune": True, + # Disabled for now due to correctness issues: + # https://github.com/flashinfer-ai/flashinfer/issues/3197 + "enable_flashinfer_autotune": False, }, } OPTIMIZATION_LEVEL_02 = { @@ -229,7 +231,9 @@ def enable_mla_dual_rms_norm_fusion(cfg: "VllmConfig") -> bool: "use_inductor_graph_partition": False, }, "kernel_config": { - "enable_flashinfer_autotune": True, + # Disabled for now due to correctness issues: + # https://github.com/flashinfer-ai/flashinfer/issues/3197 + "enable_flashinfer_autotune": False, }, } OPTIMIZATION_LEVEL_03 = {