Skip to content

Commit 25bb9e8

Browse files
authored
[CI Failure] fix models/language/pooling/test_auto_prefix_cache_support.py (#24636)
Signed-off-by: wang.yuqi <[email protected]>
1 parent a1213fa commit 25bb9e8

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

vllm/config/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3558,6 +3558,10 @@ def __post_init__(self):
35583558
disable_chunked_prefill_reasons.append(
35593559
"Only \"last\" pooling supports chunked "
35603560
"prefill and prefix caching; disabling both.")
3561+
if not getattr(self.model_config.hf_config, "is_causal", True):
3562+
disable_chunked_prefill_reasons.append(
3563+
"Only models using causal attention supports chunked "
3564+
"prefill and prefix caching; disabling both.")
35613565
elif self.model_config.is_encoder_decoder:
35623566
self.scheduler_config.max_num_encoder_input_tokens = \
35633567
MULTIMODAL_REGISTRY.get_encdec_max_encoder_len(self.model_config)

0 commit comments

Comments
 (0)