diff --git a/vllm/v1/attention/backends/mla/indexer.py b/vllm/v1/attention/backends/mla/indexer.py index ded321834607..28d7e11b7801 100644 --- a/vllm/v1/attention/backends/mla/indexer.py +++ b/vllm/v1/attention/backends/mla/indexer.py @@ -164,6 +164,10 @@ def get_name() -> str: def get_supported_kernel_block_sizes() -> list[int | MultipleOf]: return [256] + @staticmethod + def get_builder_cls() -> type["DeepseekV32IndexerMetadataBuilder"]: + return DeepseekV4IndexerMetadataBuilder + @dataclass class DeepseekV32IndexerPrefillChunkMetadata: @@ -638,6 +642,10 @@ def build( return attn_metadata +class DeepseekV4IndexerMetadataBuilder(DeepseekV32IndexerMetadataBuilder): + natively_supported_next_n_fp4: list[int] = [1] + + def build_prefill_chunk_metadata( start_idx: int, end_idx: int,