vllm-project · vllm-bot · May 1, 2026 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
@@ -35,6 +35,7 @@
     AttentionMetadataBuilder,
     AttentionType,
     CommonAttentionMetadata,
+    MultipleOf,
 )
 from vllm.v1.kv_cache_interface import AttentionSpec, EncoderOnlyAttentionSpec
 
@@ -133,6 +134,10 @@ def use_cascade_attention(*args, **kwargs) -> bool:
     @classmethod
     def get_supported_head_sizes(cls) -> list[int]:
         return []
+
+    @staticmethod
+    def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
+        return [MultipleOf(16)]
 
 
 # @torch.compile(fullgraph=True, mode="reduce-overhead")