Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions vllm/v1/attention/backends/flex_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
AttentionMetadataBuilder,
AttentionType,
CommonAttentionMetadata,
MultipleOf,
)
from vllm.v1.kv_cache_interface import AttentionSpec, EncoderOnlyAttentionSpec

Expand Down Expand Up @@ -133,6 +134,10 @@ def use_cascade_attention(*args, **kwargs) -> bool:
@classmethod
def get_supported_head_sizes(cls) -> list[int]:
return []

@staticmethod
def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
return [MultipleOf(16)]


# @torch.compile(fullgraph=True, mode="reduce-overhead")
Expand Down
Loading