From 8efa67f7b322aabaa64fbf533e667b8b9bbb9e79 Mon Sep 17 00:00:00 2001
From: arbi-dev <dmitri.evseev@arbi.city>
Date: Sun, 5 Apr 2026 15:35:51 +0100
Subject: [PATCH] Add GQA group_size 5, 6, 7 to DISPATCH_GQA_GROUP_SIZE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The macro only dispatched group sizes 1, 2, 3, 4, 8 — any other value
hit a runtime error ("Unsupported group_size"). This breaks several
popular models with non-power-of-2 GQA ratios:

  - group_size 6: Qwen3.5-27B (24Q/4KV), InternLM2.5-20B (48Q/8KV)
  - group_size 7: Qwen2.5-7B (28Q/4KV), Yi-1.5-34B (56Q/8KV)

Add explicit constexpr cases for 5, 6, and 7 so all group sizes 1-8
are supported. Each adds one template instantiation per call site.

The error manifests as:
  RuntimeError: Unsupported group_size: 6
when calling BatchDecodeWithPagedKVCache or similar kernel dispatch
paths that go through DISPATCH_GQA_GROUP_SIZE.
---
 include/flashinfer/utils.cuh | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/flashinfer/utils.cuh b/include/flashinfer/utils.cuh
index c7edf5ab57..4718730126 100644
--- a/include/flashinfer/utils.cuh
+++ b/include/flashinfer/utils.cuh
@@ -147,6 +147,15 @@
   } else if (group_size == 4) {                              \
     constexpr size_t GROUP_SIZE = 4;                         \
     __VA_ARGS__                                              \
+  } else if (group_size == 5) {                              \
+    constexpr size_t GROUP_SIZE = 5;                         \
+    __VA_ARGS__                                              \
+  } else if (group_size == 6) {                              \
+    constexpr size_t GROUP_SIZE = 6;                         \
+    __VA_ARGS__                                              \
+  } else if (group_size == 7) {                              \
+    constexpr size_t GROUP_SIZE = 7;                         \
+    __VA_ARGS__                                              \
   } else if (group_size == 8) {                              \
     constexpr size_t GROUP_SIZE = 8;                         \
     __VA_ARGS__                                              \