[Accuracy diff] Fix accuracy diff for conv2d_transpose API with NHWC format

xiaohajiayou · xiaohajiayou · commit 49c8e68b63ac · 2025-09-08T21:46:13.000+08:00
Fix gradient calculation error in conv2d_transpose when using NHWC format
with padding &gt; 0. The issue was in im2col_cfo_cpu.h where incorrect index
calculation caused gradients to be shifted to wrong positions.

Key changes:
- Replace incorrect ternary operator index calculation with direct
  calculation and boundary checking in NHWC branches
- Add TestWithSAMEPad_NHWC and TestWithSAMEPadGroups_NHWC test cases
- Ensure gradients match PyTorch reference implementation
- Fix code formatting to meet clang-format requirements
diff --git a/paddle/phi/kernels/funcs/im2col_cfo_cpu.h b/paddle/phi/kernels/funcs/im2col_cfo_cpu.h
@@ -210,11 +210,15 @@ inline void im2col_sh1sw1dh1dw1ph1pw1(const phi::DenseTensor& im,
             std::memcpy(dst_data + plw, src_data, copy_size);
           } else {
             for (int kow = 0; kow < output_width - plw - prw; ++kow) {
-              dst_data[plw + kow] =
-                  im_data[(((oh - plh > 0 ? oh - plh : 0) + kh) * im_width +
-                           kow) *
-                              im_channels +
-                          ic];
+              int im_row = oh - plh + kh;
+              int im_col = kow;
+              if (im_row >= 0 && im_row < im_height && im_col >= 0 &&
+                  im_col < im_width) {
+                dst_data[plw + kow] =
+                    im_data[(im_row * im_width + im_col) * im_channels + ic];
+              } else {
+                dst_data[plw + kow] = static_cast<T>(0);
+              }
             }
           }
           dst_data = dst_data + col_matrix_width;
@@ -269,11 +273,15 @@ inline void im2col_sh1sw1dh1dw1ph1pw1(const phi::DenseTensor& im,
                         sizeof(T) * (output_width - (plw - kw)));
           } else {
             for (int kow = 0; kow < output_width - (plw - kw); ++kow) {
-              dst_data[plw - kw + kow] =
-                  im_data[(((oh - plh > 0 ? oh - plh : 0) + kh) * im_width +
-                           kow) *
-                              im_channels +
-                          ic];
+              int im_row = oh - plh + kh;
+              int im_col = kow;
+              if (im_row >= 0 && im_row < im_height && im_col >= 0 &&
+                  im_col < im_width) {
+                dst_data[plw - kw + kow] =
+                    im_data[(im_row * im_width + im_col) * im_channels + ic];
+              } else {
+                dst_data[plw - kw + kow] = static_cast<T>(0);
+              }
             }
           }
           dst_data = dst_data + col_matrix_width;
@@ -284,11 +292,15 @@ inline void im2col_sh1sw1dh1dw1ph1pw1(const phi::DenseTensor& im,
                 dst_data, src_data + (kw - plw), sizeof(T) * output_width);
           } else {
             for (int kow = 0; kow < output_width; ++kow) {
-              dst_data[kow] =
-                  im_data[(((oh - plh > 0 ? oh - plh : 0) + kh) * im_width +
-                           kw - plw + kow) *
-                              im_channels +
-                          ic];
+              int im_row = oh - plh + kh;
+              int im_col = kw - plw + kow;
+              if (im_row >= 0 && im_row < im_height && im_col >= 0 &&
+                  im_col < im_width) {
+                dst_data[kow] =
+                    im_data[(im_row * im_width + im_col) * im_channels + ic];
+              } else {
+                dst_data[kow] = static_cast<T>(0);
+              }
             }
           }
           dst_data = dst_data + col_matrix_width;
@@ -301,11 +313,15 @@ inline void im2col_sh1sw1dh1dw1ph1pw1(const phi::DenseTensor& im,
                         sizeof(T) * (output_width - i));
           } else {
             for (int kow = 0; kow < output_width - i; ++kow) {
-              dst_data[kow] =
-                  im_data[(((oh - plh > 0 ? oh - plh : 0) + kh) * im_width +
-                           kw - plw + kow) *
-                              im_channels +
-                          ic];
+              int im_row = oh - plh + kh;
+              int im_col = kw - plw + kow;
+              if (im_row >= 0 && im_row < im_height && im_col >= 0 &&
+                  im_col < im_width) {
+                dst_data[kow] =
+                    im_data[(im_row * im_width + im_col) * im_channels + ic];
+              } else {
+                dst_data[kow] = static_cast<T>(0);
+              }
             }
           }
           dst_data = dst_data + col_matrix_width;
diff --git a/test/legacy_test/test_conv2d_transpose_op.py b/test/legacy_test/test_conv2d_transpose_op.py
@@ -1575,5 +1575,29 @@ def init_data(self):
         self.np_out = np.zeros([4, 0, 6, 6])
 
 
+class TestWithSAMEPad_NHWC(TestConv2DTransposeOp):
+    def init_test_case(self):
+        self.stride = [1, 1]
+        self.dilations = [1, 1]
+        self.groups = 1
+        self.input_size = [1, 3, 3, 1]  # NHWC
+        f_c = self.input_size[-1]
+        self.filter_size = [f_c, 2, 3, 3]
+        self.data_format = 'NHWC'
+        self.padding_algorithm = 'SAME'
+
+
+class TestWithSAMEPadGroups_NHWC(TestConv2DTransposeOp):
+    def init_test_case(self):
+        self.stride = [1, 1]
+        self.dilations = [1, 1]
+        self.groups = 2
+        self.input_size = [1, 3, 3, 2]  # NHWC
+        f_c = self.input_size[-1]
+        self.filter_size = [f_c, 1, 3, 3]
+        self.data_format = 'NHWC'
+        self.padding_algorithm = 'SAME'
+
+
 if __name__ == '__main__':
     unittest.main()