From 5f756c6c0fff17c737cf9780b4b09dc20d8c07b0 Mon Sep 17 00:00:00 2001 From: lalala-sh Date: Mon, 9 Mar 2026 03:41:03 +0000 Subject: [PATCH 1/3] fix memset size --- .../device/impl/device_moe_gemm_blockscale.hpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/projects/composablekernel/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp b/projects/composablekernel/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp index 12d28f572c5d..7254cccc5953 100644 --- a/projects/composablekernel/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp +++ b/projects/composablekernel/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp @@ -256,7 +256,8 @@ struct DeviceMoeGemmBlockScale if(arg_.KBatch > 1) hipGetErrorString(hipMemsetAsync(arg_.p_c_grid, 0, - arg_.M * arg_.N * sizeof(CDataType) * + arg_.NumTokens * arg_.TopK * arg_.N * + sizeof(CDataType) * (IsInputGemm && IsSplitK ? 2 : 1), stream_config.stream_id_)); }; @@ -272,13 +273,13 @@ struct DeviceMoeGemmBlockScale } else { - if(arg.KBatch > 1) - hipGetErrorString(hipMemsetAsync(arg.p_c_grid, - 0, - arg.M * arg.N * sizeof(CDataType) * - (IsInputGemm && IsSplitK ? 2 : 1), - stream_config.stream_id_)); - + if(arg_.KBatch > 1) + hipGetErrorString( + hipMemsetAsync(arg_.p_c_grid, + 0, + arg_.NumTokens * arg_.TopK * arg_.N * sizeof(CDataType) * + (IsInputGemm && IsSplitK ? 2 : 1), + stream_config.stream_id_)); ave_time = launch_and_time_kernel( stream_config, kernel, dim3(gdx, gdy, gdz), dim3(BlockSize), 0, arg); } From 7b27d09422afbf4b92c2eba826f25a6d02619d9a Mon Sep 17 00:00:00 2001 From: lalala-sh Date: Wed, 11 Mar 2026 13:50:59 +0800 Subject: [PATCH 2/3] fix typo --- .../gpu/device/impl/device_moe_gemm_blockscale.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/composablekernel/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp b/projects/composablekernel/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp index 7254cccc5953..d8a181e32f40 100644 --- a/projects/composablekernel/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp +++ b/projects/composablekernel/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp @@ -277,7 +277,7 @@ struct DeviceMoeGemmBlockScale hipGetErrorString( hipMemsetAsync(arg_.p_c_grid, 0, - arg_.NumTokens * arg_.TopK * arg_.N * sizeof(CDataType) * + arg.NumTokens * arg.TopK * arg.N * sizeof(CDataType) * (IsInputGemm && IsSplitK ? 2 : 1), stream_config.stream_id_)); ave_time = launch_and_time_kernel( From f06cce85676b47b119a1b03b2282be2f6c947e55 Mon Sep 17 00:00:00 2001 From: lalala-sh Date: Wed, 11 Mar 2026 14:00:58 +0800 Subject: [PATCH 3/3] fix typo --- .../gpu/device/impl/device_moe_gemm_blockscale.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/composablekernel/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp b/projects/composablekernel/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp index d8a181e32f40..684219b58432 100644 --- a/projects/composablekernel/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp +++ b/projects/composablekernel/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp @@ -273,9 +273,9 @@ struct DeviceMoeGemmBlockScale } else { - if(arg_.KBatch > 1) + if(arg.KBatch > 1) hipGetErrorString( - hipMemsetAsync(arg_.p_c_grid, + hipMemsetAsync(arg.p_c_grid, 0, arg.NumTokens * arg.TopK * arg.N * sizeof(CDataType) * (IsInputGemm && IsSplitK ? 2 : 1),