From 8efa67f7b322aabaa64fbf533e667b8b9bbb9e79 Mon Sep 17 00:00:00 2001 From: arbi-dev Date: Sun, 5 Apr 2026 15:35:51 +0100 Subject: [PATCH] Add GQA group_size 5, 6, 7 to DISPATCH_GQA_GROUP_SIZE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The macro only dispatched group sizes 1, 2, 3, 4, 8 — any other value hit a runtime error ("Unsupported group_size"). This breaks several popular models with non-power-of-2 GQA ratios: - group_size 6: Qwen3.5-27B (24Q/4KV), InternLM2.5-20B (48Q/8KV) - group_size 7: Qwen2.5-7B (28Q/4KV), Yi-1.5-34B (56Q/8KV) Add explicit constexpr cases for 5, 6, and 7 so all group sizes 1-8 are supported. Each adds one template instantiation per call site. The error manifests as: RuntimeError: Unsupported group_size: 6 when calling BatchDecodeWithPagedKVCache or similar kernel dispatch paths that go through DISPATCH_GQA_GROUP_SIZE. --- include/flashinfer/utils.cuh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/flashinfer/utils.cuh b/include/flashinfer/utils.cuh index c7edf5ab57..4718730126 100644 --- a/include/flashinfer/utils.cuh +++ b/include/flashinfer/utils.cuh @@ -147,6 +147,15 @@ } else if (group_size == 4) { \ constexpr size_t GROUP_SIZE = 4; \ __VA_ARGS__ \ + } else if (group_size == 5) { \ + constexpr size_t GROUP_SIZE = 5; \ + __VA_ARGS__ \ + } else if (group_size == 6) { \ + constexpr size_t GROUP_SIZE = 6; \ + __VA_ARGS__ \ + } else if (group_size == 7) { \ + constexpr size_t GROUP_SIZE = 7; \ + __VA_ARGS__ \ } else if (group_size == 8) { \ constexpr size_t GROUP_SIZE = 8; \ __VA_ARGS__ \