[Refactor] Update accumulation handling in gemm_sm90.h (#603)

LeiWang1999 · web-flow · commit 29cae4e503c7 · 2025-06-27T13:46:04.000+08:00
- Replaced the use of `tiled_mma.accumulate_ = GMMA::ScaleOut::Zero` with a call to `clear(acc)` for better clarity and maintainability in the accumulation logic.
- This change enhances the readability of the code by standardizing the approach to clearing accumulation values across multiple sections of the file.
diff --git a/src/tl_templates/cuda/gemm_sm90.h b/src/tl_templates/cuda/gemm_sm90.h
@@ -415,7 +415,7 @@ class GemmTensorOp {
     auto tCrA_view = make_tensor(tCrA.data(), remove_swizzle(tCrA.layout()));
     auto tCrB_view = make_tensor(tCrB.data(), remove_swizzle(tCrB.layout()));
     if constexpr (clear_accum) {
-      tiled_mma.accumulate_ = GMMA::ScaleOut::Zero;
+      clear(acc);
     }
     CUTE_UNROLL
     for (int k = 0; k < size<2>(tCrA); ++k) {
@@ -448,7 +448,7 @@ class GemmTensorOp {
                     partition_shape_A(tiled_mma, Shape<Int<M>, Int<K>>{}));
     auto tCrB_view = make_tensor(tCrB.data(), remove_swizzle(tCrB.layout()));
     if constexpr (clear_accum) {
-      tiled_mma.accumulate_ = GMMA::ScaleOut::Zero;
+      clear(acc);
     }
     copy(tiled_copy_B, tCsB(_, _, 0), tCrB_copy_view(_, _, 0));
     CUTE_UNROLL
@@ -483,7 +483,7 @@ class GemmTensorOp {
                     partition_shape_B(tiled_mma, Shape<Int<N>, Int<K>>{}));
     auto tCrA_view = make_tensor(tCrA.data(), remove_swizzle(tCrA.layout()));
     if constexpr (clear_accum) {
-      tiled_mma.accumulate_ = GMMA::ScaleOut::Zero;
+      clear(acc);
     }
     copy(tiled_copy_A, tCsA(_, _, 0), tCrA_copy_view(_, _, 0));
     CUTE_UNROLL