diff --git a/tensorrt_llm/llmapi/llm_args.py b/tensorrt_llm/llmapi/llm_args.py index 337127fa4ae..5c758134d1e 100644 --- a/tensorrt_llm/llmapi/llm_args.py +++ b/tensorrt_llm/llmapi/llm_args.py @@ -2708,7 +2708,7 @@ class TorchLlmArgs(BaseLlmArgs): _quant_config: Optional[QuantConfig] = PrivateAttr(default=None) disable_flashinfer_sampling: bool = Field( - default=True, + default=False, description= "Disable the use of FlashInfer.sampling. This option is likely to be removed in the future.", status="prototype", diff --git a/tests/unittest/api_stability/references/llm.yaml b/tests/unittest/api_stability/references/llm.yaml index 49e2169f479..18308ab290e 100644 --- a/tests/unittest/api_stability/references/llm.yaml +++ b/tests/unittest/api_stability/references/llm.yaml @@ -109,7 +109,7 @@ methods: status: beta disable_flashinfer_sampling: annotation: bool - default: True + default: False status: prototype moe_config: annotation: tensorrt_llm.llmapi.llm_args.MoeConfig