We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 707ec59 commit 0b82b35Copy full SHA for 0b82b35
tensorrt_llm/llmapi/llm_args.py
@@ -2014,8 +2014,13 @@ def validate_cuda_graph_config(self) -> 'TorchLlmArgs':
2014
2015
@model_validator(mode='after')
2016
def sync_quant_config_with_kv_cache_config_dtype(self) -> 'TorchLlmArgs':
2017
+ if self.kv_cache_config is None:
2018
+ return self
2019
+
2020
assert self.quant_config is not None
- if self.kv_cache_config.dtype == 'fp8':
2021
+ if self.kv_cache_config.dtype == "auto":
2022
2023
+ elif self.kv_cache_config.dtype == 'fp8':
2024
self.quant_config.kv_cache_quant_algo = QuantAlgo.FP8
2025
else:
2026
logger.warning(
0 commit comments