diff --git a/vllm_ascend/attention/context_parallel/mla_cp.py b/vllm_ascend/attention/context_parallel/mla_cp.py index 539f2a330e6..87413f5b692 100644 --- a/vllm_ascend/attention/context_parallel/mla_cp.py +++ b/vllm_ascend/attention/context_parallel/mla_cp.py @@ -74,7 +74,7 @@ def build( fast_build: bool = False, ) -> AscendMLAMetadata: metadata_cls = super().build(common_prefix_len, common_attn_metadata) - if self.num_prefills == 0 and self.pcp_size > 1: + if self.pcp_size > 1: self.slot_mapping[:self. num_decode_tokens] = self.slot_mapping[:self. num_decode_tokens