From aeed7acf024a599e673a9e694fa102694c14e35c Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Fri, 13 Feb 2026 13:32:59 -0500 Subject: [PATCH] =?UTF-8?q?Revert=20"[Bugfix]=20Fix=20fused=20MoE=20IMA=20?= =?UTF-8?q?(sans=20chunking)=20by=20using=20int64=20for=20strides=E2=80=A6?= =?UTF-8?q?"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit d7982daff5334b9465b29fa943a1954c064ab226. --- .../layers/fused_moe/fused_moe.py | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index f988e91c2478..5240f79be5cd 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -98,19 +98,19 @@ def fused_moe_kernel_gptq_awq( # moving by 1 element in a particular dimension. E.g. `stride_am` is # how much to increase `a_ptr` by to get the element one row down # (A has M rows). - stride_am: tl.int64, - stride_ak: tl.int64, - stride_be: tl.int64, - stride_bk: tl.int64, - stride_bn: tl.int64, - stride_cm: tl.int64, - stride_cn: tl.int64, - stride_bse: tl.int64, - stride_bsk: tl.int64, - stride_bsn: tl.int64, - stride_bze: tl.int64, - stride_bzk: tl.int64, - stride_bzn: tl.int64, + stride_am, + stride_ak, + stride_be, + stride_bk, + stride_bn, + stride_cm, + stride_cn, + stride_bse, + stride_bsk, + stride_bsn, + stride_bze, + stride_bzk, + stride_bzn, block_k_diviable: tl.constexpr, group_size: tl.constexpr, # Meta-parameters @@ -332,20 +332,20 @@ def fused_moe_kernel( # moving by 1 element in a particular dimension. E.g. `stride_am` is # how much to increase `a_ptr` by to get the element one row down # (A has M rows). - stride_am: tl.int64, - stride_ak: tl.int64, - stride_be: tl.int64, - stride_bk: tl.int64, - stride_bn: tl.int64, - stride_cm: tl.int64, - stride_cn: tl.int64, - stride_asm: tl.int64, - stride_ask: tl.int64, - stride_bse: tl.int64, - stride_bsk: tl.int64, - stride_bsn: tl.int64, - stride_bbe: tl.int64, # bias expert stride - stride_bbn: tl.int64, # bias N stride + stride_am, + stride_ak, + stride_be, + stride_bk, + stride_bn, + stride_cm, + stride_cn, + stride_asm, + stride_ask, + stride_bse, + stride_bsk, + stride_bsn, + stride_bbe, # bias expert stride + stride_bbn, # bias N stride # Block size for block-wise quantization group_n: tl.constexpr, group_k: tl.constexpr,