diff --git a/paddle/phi/kernels/fusion/cutlass/cutlass_extensions/gemm/threadblock/dq_mma_base.h b/paddle/phi/kernels/fusion/cutlass/cutlass_extensions/gemm/threadblock/dq_mma_base.h index 53fffb6c80c3d1..5ca3cbcf17740c 100644 --- a/paddle/phi/kernels/fusion/cutlass/cutlass_extensions/gemm/threadblock/dq_mma_base.h +++ b/paddle/phi/kernels/fusion/cutlass/cutlass_extensions/gemm/threadblock/dq_mma_base.h @@ -169,7 +169,8 @@ class DqMmaBase { AlignedBuffer operand_B; /// Buffer to hold scales for threadblock - AlignedBuffer operand_scale; + /// Note(zkk): allocate more used in group wise weight-only. + AlignedBuffer operand_scale; public: //