diff --git a/vllm/config/attention.py b/vllm/config/attention.py index 014bb9b22601..1da647a6d6ff 100644 --- a/vllm/config/attention.py +++ b/vllm/config/attention.py @@ -30,7 +30,7 @@ class AttentionConfig: use_cudnn_prefill: bool = False """Whether to use cudnn prefill.""" - use_trtllm_ragged_deepseek_prefill: bool = True + use_trtllm_ragged_deepseek_prefill: bool = False """Whether to use TRTLLM ragged deepseek prefill.""" use_trtllm_attention: bool | None = None