diff --git a/.github/workflows/metax_work.yaml b/.github/workflows/metax_work.yaml index fd7d04c0843..353cbb098b6 100644 --- a/.github/workflows/metax_work.yaml +++ b/.github/workflows/metax_work.yaml @@ -5,12 +5,6 @@ on: pull_request: types: [opened, synchronize] branches: [develop, release/**] - paths: - - "**" - - "Paddle/**" - - "!backends/**" - - "backends/metax_gpu/**" - permissions: read-all defaults: @@ -40,16 +34,48 @@ jobs: git fetch origin pull/${{ github.event.pull_request.number }}/head:pull/${{ github.event.pull_request.number }}/head git checkout pull/${{ github.event.pull_request.number }}/head - git submodule update --init --recursive + + + + paddle_branch=${{ github.base_ref || github.ref_name}} + echo $paddle_branch + # sleep 10000 + change_numbers=$(git diff --name-only remotes/origin/${paddle_branch} | wc -l) + echo $change_numbers + + + change_backend=$(git diff --name-only remotes/origin/${paddle_branch} | grep -c "backends/" || true) + echo $change_backend + change_metax_only=$(git diff --name-only remotes/origin/${paddle_branch} | grep -c "backends/metax_gpu" || true) + echo $change_metax_only + + # change_backend=$(git diff --name-only remotes/origin/${paddle_branch} | grep "backends/"| wc -l) + # echo $change_backend + # change_metax_only=$(git diff --name-only remotes/origin/${paddle_branch} | grep "backends/metax_gpu"| wc -l) + # echo $change_metax_only + + git diff --name-only remotes/origin/${paddle_branch} + + if [ $change_numbers -ne $change_backend ]; then + echo "Common file changed, continue to run metax FULL CI test ..." + elif [ $paddle_branch -eq 0 ] ; then + echo "NO metax backend changes found, skip metax FULL CI ....." + exit 0 + fi + + + # git submodule update --init --recursive fi - name: compile run: | + # sleep 10000 cd backends/metax_gpu bash build.sh - name: run test + run: | cd backends/metax_gpu/tests bash run_test.sh -j 16 diff --git a/Paddle b/Paddle index cc367e8767d..945ea696575 160000 --- a/Paddle +++ b/Paddle @@ -1 +1 @@ -Subproject commit cc367e8767d49819b5100f22e279cd62a1587670 +Subproject commit 945ea69657591c6f702cbb9ccee0d9eefe9bf5f7 diff --git a/backends/metax_gpu/CMakeLists.txt b/backends/metax_gpu/CMakeLists.txt index e357a5e5912..3e92996f9a2 100755 --- a/backends/metax_gpu/CMakeLists.txt +++ b/backends/metax_gpu/CMakeLists.txt @@ -703,9 +703,9 @@ file( set(CUSTOM_DEVICE_SRCS ${CUDA_SRCS} ${CC_SRCS} ${ERNIE_CORE_SRCS}) set_source_files_properties(${CUSTOM_DEVICE_SRCS} PROPERTIES LANGUAGE CUDA) - +set(MACA_PATH $ENV{MACA_PATH}) set(CMAKE_CUCC_COMPILER "cucc") -set(CMAKE_CUCC_FLAGS "-I /opt/maca/tools/cu-bridge/include/") +set(CMAKE_CUCC_FLAGS "-I ${MACA_PATH}/tools/cu-bridge/include/") add_library(${TARGET_NAME} SHARED ${CUSTOM_DEVICE_SRCS}) @@ -734,9 +734,9 @@ target_link_libraries( ${WARPRNNT_LIBRARIES} ${PADDLE_CORE_LIB}) -target_link_libraries(${TARGET_NAME} /opt/maca/lib/libmccl.so) -target_link_libraries(${TARGET_NAME} /opt/maca/lib/libmcFlashAttn.so) -target_link_libraries(${TARGET_NAME} /opt/maca/lib/libmcpti.so) +target_link_libraries(${TARGET_NAME} ${MACA_PATH}/lib/libmccl.so) +target_link_libraries(${TARGET_NAME} ${MACA_PATH}/lib/libmcFlashAttn.so) +target_link_libraries(${TARGET_NAME} ${MACA_PATH}/lib/libmcpti.so) include_directories(BEFORE ${PADDLE_SOURCE_DIR}) diff --git a/backends/metax_gpu/cmake/dgc.cmake b/backends/metax_gpu/cmake/dgc.cmake index 4c54e636d5e..4c61f2e6bcb 100644 --- a/backends/metax_gpu/cmake/dgc.cmake +++ b/backends/metax_gpu/cmake/dgc.cmake @@ -62,8 +62,8 @@ if(EXISTS ${DGC_DOWNLOAD_DIR}/${DGC_CACHE_FILENAME}) else() download_dgc() endif() - -set(CU_BRIDGE_PATH "/opt/maca/tools/cu-bridge") +set(MACA_PATH $ENV{MACA_PATH}) +set(CU_BRIDGE_PATH "${MACA_PATH}/tools/cu-bridge") add_custom_command( OUTPUT "${CU_BRIDGE_PATH}/bin/nvcc" diff --git a/backends/metax_gpu/kernels/cuda_kernels/activation_grad_kernel_register.cu b/backends/metax_gpu/kernels/cuda_kernels/activation_grad_kernel_register.cu index 6cdfb2f5242..6c46ef10c0f 100644 --- a/backends/metax_gpu/kernels/cuda_kernels/activation_grad_kernel_register.cu +++ b/backends/metax_gpu/kernels/cuda_kernels/activation_grad_kernel_register.cu @@ -119,7 +119,22 @@ void ActivationGradGPUImpl(const Context& dev_ctx, ActivationGradGPUImpl>( \ dev_ctx, &x, nullptr, &dout, dx, functor); \ } - +#define DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_DOUBLE_ATTRS_DEPX( \ + name, functor_class, attr1, attr2) \ + template \ + void name##GradKernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& dout, \ + double attr1, \ + double attr2, \ + DenseTensor* dx) { \ + funcs::functor_class functor; \ + auto attrs = functor.GetAttrs(); \ + *(attrs[0].second) = attr1; \ + *(attrs[1].second) = attr2; \ + ActivationGradGPUImpl>( \ + dev_ctx, &x, nullptr, &dout, dx, functor); \ + } #define DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(name, functor_class) \ template \ void name##GradKernel(const Context& dev_ctx, \ @@ -239,10 +254,10 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(STanh, scale_a, scale_b); -DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(Softplus, - CudaSoftplusGradFunctor, - beta, - threshold); +DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_DOUBLE_ATTRS_DEPX(Softplus, + CudaSoftplusGradFunctor, + beta, + threshold); DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid, CudaHardSigmoidGradFunctor, slope, diff --git a/backends/metax_gpu/kernels/cuda_kernels/activation_kernel_register.cu b/backends/metax_gpu/kernels/cuda_kernels/activation_kernel_register.cu index f24f3e8abbc..363932cfc28 100644 --- a/backends/metax_gpu/kernels/cuda_kernels/activation_kernel_register.cu +++ b/backends/metax_gpu/kernels/cuda_kernels/activation_kernel_register.cu @@ -90,7 +90,21 @@ void ActivationGPUImpl(const Context& dev_ctx, ActivationGPUImpl>( \ dev_ctx, x, out, functor); \ } - +#define DEFINE_GPU_ACT_KERNEL_WITH_TWO_DOUBLE_ATTRS( \ + name, functor_class, attr1, attr2) \ + template \ + void name##Kernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + double attr1, \ + double attr2, \ + DenseTensor* out) { \ + funcs::functor_class functor; \ + auto attrs = functor.GetAttrs(); \ + *(attrs[0].second) = attr1; \ + *(attrs[1].second) = attr2; \ + ActivationGPUImpl>( \ + dev_ctx, x, out, functor); \ + } DEFINE_GPU_ACTIVATION_KERNEL(Cos, CudaCosFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Tan, CudaTanFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Acos, CudaAcosFunctor) @@ -139,10 +153,10 @@ DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(HardTanh, t_min, t_max) DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(Stanh, CudaSTanhFunctor, scale_a, scale_b) -DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(Softplus, - CudaSoftplusFunctor, - beta, - threshold) +DEFINE_GPU_ACT_KERNEL_WITH_TWO_DOUBLE_ATTRS(Softplus, + CudaSoftplusFunctor, + beta, + threshold) DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid, CudaHardSigmoidFunctor, slope, diff --git a/backends/metax_gpu/kernels/metax_kernel/weight_quantize_kernel_register.cu b/backends/metax_gpu/kernels/metax_kernel/weight_quantize_kernel_register.cu index 4e2a4ce240c..44ac7f2fddc 100644 --- a/backends/metax_gpu/kernels/metax_kernel/weight_quantize_kernel_register.cu +++ b/backends/metax_gpu/kernels/metax_kernel/weight_quantize_kernel_register.cu @@ -115,11 +115,12 @@ void WeightQuantizeKernel(const Context& dev_ctx, dev_ctx.template Alloc(scale); weight_quant_gpu(dev_ctx, x.data(), - quanted_x.data(), + out->data(), scale->data(), weight_shape, arch, algo); + out->Resize({m, n}); #ifdef PADDLE_WITH_HIP DenseTensor x_int_tmp(out->type()); x_int_tmp.Resize({m, n / 2}); @@ -133,12 +134,12 @@ void WeightQuantizeKernel(const Context& dev_ctx, funcs::Transpose trans; trans(dev_ctx, x_int_tmp, out, axis); #else - weight_permute_gpu(dev_ctx, - quanted_x.data(), - out->data(), - weight_shape, - arch, - algo); + // weight_permute_gpu(dev_ctx, + // quanted_x.data(), + // out->data(), + // weight_shape, + // arch, + // algo); #endif } else if (algo == "w4a8") { weight_permute_gpu_w4a8(dev_ctx, diff --git a/backends/metax_gpu/tests/CMakeLists.txt b/backends/metax_gpu/tests/CMakeLists.txt index 0c84ada4b65..084b5b8c601 100755 --- a/backends/metax_gpu/tests/CMakeLists.txt +++ b/backends/metax_gpu/tests/CMakeLists.txt @@ -49,7 +49,13 @@ foreach(test_name ${TEST_PROGRAMS}) continue() endif() - set(CURRENT_TEST_PROGRAM ${PADDLE_LEGACY_TEST_PATH}/${test_name}.py) + string(FIND "${test_name}" "metax" METAX_SUFFIX_POS) + if(NOT METAX_SUFFIX_POS EQUAL -1) + set(CURRENT_TEST_PROGRAM ${METAX_UNIT_TEST_PATH}/${test_name}.py) + else() + set(CURRENT_TEST_PROGRAM ${PADDLE_LEGACY_TEST_PATH}/${test_name}.py) + endif() + if(NOT EXISTS ${CURRENT_TEST_PROGRAM}) message(WARNING "${CURRENT_TEST_PROGRAM} is not exist, skip it.") else() diff --git a/backends/metax_gpu/tests/unit_test/test_conv3d_layer_metax.py b/backends/metax_gpu/tests/unit_test/test_conv3d_layer_metax.py new file mode 100644 index 00000000000..cd4cd290065 --- /dev/null +++ b/backends/metax_gpu/tests/unit_test/test_conv3d_layer_metax.py @@ -0,0 +1,381 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest + +import numpy as np +from op_test import get_device_place, is_custom_device +from test_conv3d_op import conv3d_forward_naive + +import paddle +import paddle.base.dygraph as dg +import paddle.nn.functional as F +from paddle import base, nn +from paddle.base import core + +core.set_cudnn_switch(False) + + +class Conv3DTestCase(unittest.TestCase): + def __init__( + self, + methodName="runTest", + batch_size=4, + spatial_shape=(8, 8, 8), + num_channels=6, + num_filters=8, + filter_size=3, + padding=0, + stride=1, + dilation=1, + groups=1, + no_bias=False, + data_format="NCDHW", + dtype="float32", + ): + super().__init__(methodName) + self.batch_size = batch_size + self.num_channels = num_channels + self.num_filters = num_filters + self.spatial_shape = spatial_shape + self.filter_size = filter_size + + self.padding = padding + self.stride = stride + self.dilation = dilation + self.groups = groups + self.no_bias = no_bias + self.data_format = data_format + self.dtype = dtype + + def setUp(self): + self.channel_last = self.data_format == "NDHWC" + if self.channel_last: + input_shape = ( + self.batch_size, + *self.spatial_shape, + self.num_channels, + ) + else: + input_shape = ( + self.batch_size, + self.num_channels, + *self.spatial_shape, + ) + self.input = np.random.randn(*input_shape).astype(self.dtype) + + if isinstance(self.filter_size, int): + filter_size = [self.filter_size] * 3 + else: + filter_size = self.filter_size + self.weight_shape = weight_shape = ( + self.num_filters, + self.num_channels // self.groups, + *filter_size, + ) + self.weight = np.random.uniform(-1, 1, size=weight_shape).astype(self.dtype) + if not self.no_bias: + self.bias = np.random.uniform(-1, 1, size=(self.num_filters,)).astype( + self.dtype + ) + else: + self.bias = None + + def base_layer(self, place): + main = base.Program() + start = base.Program() + with ( + base.unique_name.guard(), + base.program_guard(main, start), + ): + input_shape = ( + (-1, -1, -1, -1, self.num_channels) + if self.channel_last + else (-1, self.num_channels, -1, -1, -1) + ) + x_var = paddle.static.data("input", input_shape, dtype=self.dtype) + weight_attr = paddle.nn.initializer.Assign(self.weight) + if self.bias is None: + bias_attr = False + else: + bias_attr = paddle.nn.initializer.Assign(self.bias) + y_var = paddle.nn.Conv3D( + in_channels=self.num_channels, + out_channels=self.num_filters, + kernel_size=self.filter_size, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + padding_mode="zeros", + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=self.data_format, + )(x_var) + feed_dict = {"input": self.input} + exe = base.Executor(place) + exe.run(start) + (y_np,) = exe.run(main, feed=feed_dict, fetch_list=[y_var]) + return y_np + + def functional(self, place): + main = base.Program() + start = base.Program() + with ( + base.unique_name.guard(), + base.program_guard(main, start), + ): + input_shape = ( + (-1, -1, -1, -1, self.num_channels) + if self.channel_last + else (-1, self.num_channels, -1, -1, -1) + ) + x_var = paddle.static.data("input", input_shape, dtype=self.dtype) + w_var = paddle.static.data("weight", self.weight_shape, dtype=self.dtype) + if not self.no_bias: + b_var = paddle.static.data( + "bias", (self.num_filters,), dtype=self.dtype + ) + else: + b_var = None + y_var = F.conv3d( + x_var, + w_var, + b_var, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format, + ) + feed_dict = {"input": self.input, "weight": self.weight} + if self.bias is not None: + feed_dict["bias"] = self.bias + exe = base.Executor(place) + exe.run(start) + (y_np,) = exe.run(main, feed=feed_dict, fetch_list=[y_var]) + return y_np + + def paddle_nn_layer(self): + x_var = paddle.to_tensor(self.input) + x_var.stop_gradient = False + conv = nn.Conv3D( + self.num_channels, + self.num_filters, + self.filter_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format, + ) + conv.weight.set_value(self.weight) + if not self.no_bias: + conv.bias.set_value(self.bias) + y_var = conv(x_var) + y_var.backward() + y_np = y_var.numpy() + t1 = x_var.gradient() + return y_np, t1 + + def _test_pir_equivalence(self, place): + with paddle.pir_utils.IrGuard(): + result1 = self.base_layer(place) + result2 = self.functional(place) + with dg.guard(place): + result3, g1 = self.paddle_nn_layer() + np.testing.assert_array_almost_equal(result1, result2) + np.testing.assert_array_almost_equal(result2, result3) + + def runTest(self): + place = base.CPUPlace() + self._test_pir_equivalence(place) + + if base.core.is_compiled_with_cuda() or is_custom_device(): + place = get_device_place() + self._test_pir_equivalence(place) + + +class Conv3DErrorTestCase(Conv3DTestCase): + def runTest(self): + place = base.CPUPlace() + with ( + dg.guard(place), + self.assertRaises(ValueError), + ): + self.paddle_nn_layer() + + +def add_cases(suite): + suite.addTest(Conv3DTestCase(methodName="runTest")) + suite.addTest(Conv3DTestCase(methodName="runTest", stride=[1, 2, 1], dilation=2)) + suite.addTest(Conv3DTestCase(methodName="runTest", stride=2, dilation=(2, 1, 2))) + suite.addTest(Conv3DTestCase(methodName="runTest", padding="same", no_bias=True)) + suite.addTest( + Conv3DTestCase(methodName="runTest", filter_size=(3, 2, 3), padding="valid") + ) + suite.addTest(Conv3DTestCase(methodName="runTest", padding=(2, 3, 1))) + suite.addTest(Conv3DTestCase(methodName="runTest", padding=[1, 2, 2, 1, 2, 3])) + suite.addTest( + Conv3DTestCase( + methodName="runTest", + padding=[[0, 0], [0, 0], [1, 2], [2, 1], [2, 2]], + ) + ) + suite.addTest(Conv3DTestCase(methodName="runTest", data_format="NDHWC")) + suite.addTest( + Conv3DTestCase( + methodName="runTest", + data_format="NDHWC", + padding=[[0, 0], [1, 1], [3, 3], [2, 2], [0, 0]], + ) + ) + suite.addTest(Conv3DTestCase(methodName="runTest", groups=2, padding="valid")) + suite.addTest( + Conv3DTestCase( + methodName="runTest", + num_filters=6, + num_channels=3, + groups=3, + padding="valid", + ) + ) + + +def add_error_cases(suite): + suite.addTest(Conv3DErrorTestCase(methodName="runTest", num_channels=5, groups=2)) + suite.addTest( + Conv3DErrorTestCase( + methodName="runTest", num_channels=5, groups=2, padding=[-1, 1, 3] + ) + ) + + +def load_tests(loader, standard_tests, pattern): + suite = unittest.TestSuite() + add_cases(suite) + add_error_cases(suite) + return suite + + +def get_places(): + places = [] + if core.is_compiled_with_xpu(): + places.append(paddle.device.XPUPlace(0)) + elif core.is_compiled_with_cuda(): + places.append(paddle.CUDAPlace(0)) + places.append(paddle.CPUPlace()) + return places + + +class TestConv3dAPI_Compatibility(unittest.TestCase): + def setUp(self): + np.random.seed(2025) + self.places = get_places() + self.shape_x = [2, 3, 8, 8, 8] # NCDHW + self.shape_w = [6, 3, 3, 3, 3] # Co, Cin, kD, kH, kW + self.dtype = "float32" + self.init_data() + + def init_data(self): + self.np_x = np.random.rand(*self.shape_x).astype(self.dtype) + self.np_w = np.random.rand(*self.shape_w).astype(self.dtype) + conv_param = { + "stride": [1, 1, 1], + "pad": [0, 0, 0], + "dilation": [1, 1, 1], + } + self.np_ref_out = conv3d_forward_naive(self.np_x, self.np_w, 1, conv_param) + + def test_dygraph_Compatibility(self): + for place in self.places: + paddle.device.set_device(place) + paddle.disable_static() + x = paddle.to_tensor(self.np_x) + w = paddle.to_tensor(self.np_w) + + paddle_dygraph_out = [] + # Position args (args) + out1 = paddle.nn.functional.conv3d(x, w) + paddle_dygraph_out.append(out1) + # Key words args (kwargs) for paddle + out2 = paddle.nn.functional.conv3d(x=x, weight=w) + paddle_dygraph_out.append(out2) + # Key words args for alias compatibility + out3 = paddle.nn.functional.conv3d(input=x, weight=w) + paddle_dygraph_out.append(out3) + # Combined args and kwargs + out4 = paddle.nn.functional.conv3d(x, weight=w) + paddle_dygraph_out.append(out4) + + # refer to test/xpu/test_conv3d_op_xpu.py + if isinstance(place, core.XPUPlace): + rtol = 5e-3 + atol = 5e-3 + else: + rtol = 1e-5 + atol = 0 + + # Check all dygraph results against reference + for out in paddle_dygraph_out: + np.testing.assert_allclose( + self.np_ref_out, out.numpy(), rtol=rtol, atol=atol + ) + paddle.enable_static() + + def test_static_Compatibility(self): + paddle.enable_static() + + fetch_list = [] + main = paddle.static.Program() + startup = paddle.static.Program() + with base.program_guard(main, startup): + x = paddle.static.data(name="x", shape=self.shape_x, dtype=self.dtype) + w = paddle.static.data(name="w", shape=self.shape_w, dtype=self.dtype) + + # Position args (args) + out1 = paddle.nn.functional.conv3d(x, w) + fetch_list.append(out1) + # Key words args (kwargs) for paddle + out2 = paddle.nn.functional.conv3d(x=x, weight=w) + fetch_list.append(out2) + # Key words args for alias compatibility + out3 = paddle.nn.functional.conv3d(input=x, weight=w) + fetch_list.append(out3) + # Combined args and kwargs + out4 = paddle.nn.functional.conv3d(x, weight=w) + fetch_list.append(out4) + + for place in self.places: + # refer to test/xpu/test_conv2d_op_xpu.py + if isinstance(place, core.XPUPlace): + rtol = 5e-3 + atol = 5e-3 + else: + rtol = 1e-5 + atol = 0 + + exe = base.Executor(place) + fetches = exe.run( + main, + feed={"x": self.np_x, "w": self.np_w}, + fetch_list=fetch_list, + ) + for out in fetches: + np.testing.assert_allclose( + out, self.np_ref_out, rtol=rtol, atol=atol + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/metax_gpu/tests/unit_test/test_conv3d_transpose_op_metax.py b/backends/metax_gpu/tests/unit_test/test_conv3d_transpose_op_metax.py new file mode 100644 index 00000000000..6f55aac3361 --- /dev/null +++ b/backends/metax_gpu/tests/unit_test/test_conv3d_transpose_op_metax.py @@ -0,0 +1,764 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle + +paddle.enable_static() +from op_test import ( + OpTest, + copy_bits_from_float_to_uint16, + get_device_place, + is_custom_device, +) + +from paddle.base import core + +core.set_cudnn_switch(False) + + +def convert_float_to_uint16(float_list, data_format="NCHW"): + if data_format == "NHWC": + float_list = np.transpose(float_list, [0, 4, 1, 2, 3]) + + new_output = [] + for x in np.nditer(float_list): + new_output.append(np.uint16(copy_bits_from_float_to_uint16(x))) + new_output = np.reshape(new_output, float_list.shape).view(np.uint16) + + if data_format == "NHWC": + new_output = np.transpose(new_output, [0, 2, 3, 4, 1]) + return new_output + + +def conv3dtranspose_forward_naive(input_, filter_, attrs): + padding_algorithm = attrs["padding_algorithm"] + if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]: + raise ValueError( + f"Unknown Attr(padding_algorithm): '{padding_algorithm}'. " + "It can only be 'SAME' or 'VALID'." + ) + + if attrs["data_format"] == "NHWC": + input_ = np.transpose(input_, [0, 4, 1, 2, 3]) + in_n, in_c, in_d, in_h, in_w = input_.shape + f_c, f_out_c, f_d, f_h, f_w = filter_.shape + groups = attrs["groups"] + assert in_c == f_c + out_c = f_out_c * groups + sub_in_c = in_c // groups + + stride, pad, dilations = ( + attrs["strides"], + attrs["paddings"], + attrs["dilations"], + ) + + def _get_padding_with_SAME(input_shape, kernel_size, kernel_stride): + padding = [] + for input_size, filter_size, stride_size in zip( + input_shape, kernel_size, kernel_stride + ): + out_size = int((input_size + stride_size - 1) / stride_size) + pad_sum = np.max( + ((out_size - 1) * stride_size + filter_size - input_size, 0) + ) + pad_0 = int(pad_sum / 2) + pad_1 = int(pad_sum - pad_0) + padding.append(pad_0) + padding.append(pad_1) + return padding + + ksize = filter_.shape[2:5] + if padding_algorithm == "VALID": + pad = [0, 0, 0, 0, 0, 0] + elif padding_algorithm == "SAME": + dilations = [1, 1, 1] + input_data_shape = input_.shape[2:5] + pad = _get_padding_with_SAME(input_data_shape, ksize, stride) + + pad_d_0, pad_d_1 = pad[0], pad[0] + pad_h_0, pad_h_1 = pad[1], pad[1] + pad_w_0, pad_w_1 = pad[2], pad[2] + if len(pad) == 6: + pad_d_0, pad_d_1 = pad[0], pad[1] + pad_h_0, pad_h_1 = pad[2], pad[3] + pad_w_0, pad_w_1 = pad[4], pad[5] + + d_block_d = dilations[0] * (f_d - 1) + 1 + d_block_h = dilations[1] * (f_h - 1) + 1 + d_block_w = dilations[2] * (f_w - 1) + 1 + out_d = (in_d - 1) * stride[0] + d_block_d + out_h = (in_h - 1) * stride[1] + d_block_h + out_w = (in_w - 1) * stride[2] + d_block_w + out = np.zeros((in_n, out_c, out_d, out_h, out_w)) + + for n in range(in_n): + for d in range(in_d): + for i in range(in_h): + for j in range(in_w): + for g in range(groups): + input_masked = input_[ + n, g * sub_in_c : (g + 1) * sub_in_c, d, i, j + ] # (c) + input_masked = np.reshape(input_masked, (sub_in_c, 1, 1, 1)) + input_masked = np.tile(input_masked, (1, f_d, f_h, f_w)) + + for k in range(f_out_c): + tmp_out = np.sum( + input_masked + * filter_[ + g * sub_in_c : (g + 1) * sub_in_c, + k, + :, + :, + :, + ], + axis=0, + ) + d1, d2 = d * stride[0], d * stride[0] + d_block_d + i1, i2 = i * stride[1], i * stride[1] + d_block_h + j1, j2 = j * stride[2], j * stride[2] + d_block_w + out[ + n, + g * f_out_c + k, + d1 : d2 : dilations[0], + i1 : i2 : dilations[1], + j1 : j2 : dilations[2], + ] += tmp_out + + out = out[ + :, + :, + pad_d_0 : out_d - pad_d_1, + pad_h_0 : out_h - pad_h_1, + pad_w_0 : out_w - pad_w_1, + ] + if attrs["data_format"] == "NHWC": + out = np.transpose(out, [0, 2, 3, 4, 1]) + return out + + +def create_test_cudnn_fp16_class(parent, grad_check=True): + @unittest.skipIf( + not (core.is_compiled_with_cuda() or is_custom_device()), + "core is not compiled with CUDA", + ) + class TestConv3DTransposeCUDNNFP16(parent): + def init_kernel_type(self): + self.use_cudnn = True + self.dtype = np.float16 + + def test_check_output(self): + if core.is_compiled_with_cuda() or is_custom_device(): + place = get_device_place() + if core.is_float16_supported(place): + self.check_output_with_place(place, atol=2e-2) + + def test_check_grad_no_filter(self): + place = get_device_place() + if core.is_float16_supported(place) and grad_check: + self.check_grad_with_place( + place, ["Input"], "Output", no_grad_set={"Filter"} + ) + + def test_check_grad_no_input(self): + place = get_device_place() + if core.is_float16_supported(place) and grad_check: + self.check_grad_with_place( + place, ["Filter"], "Output", no_grad_set={"Input"} + ) + + cls_name = "{}_{}".format(parent.__name__, "CUDNNFP16OP") + TestConv3DTransposeCUDNNFP16.__name__ = cls_name + globals()[cls_name] = TestConv3DTransposeCUDNNFP16 + + +def create_test_cudnn_bf16_class(parent): + @unittest.skipIf( + not (core.is_compiled_with_cuda() or is_custom_device()) + or not core.is_bfloat16_supported(get_device_place()), + "core is not compiled with CUDA and do not support bfloat16", + ) + class TestConv3DTransposeCUDNNBF16(parent): + def init_kernel_type(self): + self.use_cudnn = True + self.dtype = np.uint16 + + def test_check_output(self): + place = get_device_place() + self.check_output_with_place(place) + + def test_check_grad(self): + place = get_device_place() + self.check_grad_with_place( + place, + {"Input", "Filter"}, + "Output", + ) + + def test_check_grad_no_filter(self): + place = get_device_place() + self.check_grad_with_place( + place, + ["Input"], + "Output", + no_grad_set={"Filter"}, + ) + + def test_check_grad_no_input(self): + place = get_device_place() + self.check_grad_with_place( + place, + ["Filter"], + "Output", + no_grad_set={"Input"}, + ) + + cls_name = "{}_{}".format(parent.__name__, "CUDNNBF16OP") + TestConv3DTransposeCUDNNBF16.__name__ = cls_name + globals()[cls_name] = TestConv3DTransposeCUDNNBF16 + + +def conv3d_transpose_wrapper( + x, + weight, + stride=1, + padding=0, + output_padding=[], + output_size=[], + padding_algorithm="EXPLICIT", + groups=1, + dilation=1, + data_format="NCDHW", +): + if data_format == "AnyLayout": + data_format = "NCDHW" + return paddle._C_ops.conv3d_transpose( + x, + weight, + stride, + padding, + output_padding, + output_size, + padding_algorithm, + groups, + dilation, + data_format, + ) + + +class TestConv3DTransposeOp(OpTest): + def setUp(self): + # init as conv transpose + self.use_cudnn = False + self.check_no_input = False + self.check_no_filter = False + self.data_format = "NCHW" + self.pad = [0, 0, 0] + self.padding_algorithm = "EXPLICIT" + self.init_op_type() + self.init_kernel_type() + self.init_test_case() + + if self.is_bfloat16_op(): + input = np.random.random(self.input_size).astype(np.float32) + filter = np.random.random(self.filter_size).astype(np.float32) + else: + input = np.random.random(self.input_size).astype(self.dtype) + filter = np.random.random(self.filter_size).astype(self.dtype) + + self.attrs = { + "strides": self.stride, + "paddings": self.pad, + "padding_algorithm": self.padding_algorithm, + "dilations": self.dilations, + "groups": self.groups, + "use_cudnn": self.use_cudnn, + "data_format": self.data_format, + } + + output = conv3dtranspose_forward_naive(input, filter, self.attrs).astype( + "float32" + ) + + if self.is_bfloat16_op(): + self.inputs = { + "Input": convert_float_to_uint16(input), + "Filter": convert_float_to_uint16(filter), + } + else: + self.inputs = { + "Input": input, + "Filter": filter, + } + output = output.astype(self.dtype) + + self.outputs = {"Output": output} + + def test_check_output(self): + if self.use_cudnn: + place = get_device_place() + self.check_output_with_place(place, atol=1e-5) + else: + self.check_output() + + def test_check_grad(self): + if self.use_cudnn: + place = get_device_place() + self.check_grad_with_place( + place, + {"Input", "Filter"}, + "Output", + max_relative_error=0.03, + ) + else: + self.check_grad({"Input", "Filter"}, "Output", max_relative_error=0.03) + + def test_check_grad_no_filter(self): + if self.use_cudnn: + place = get_device_place() + self.check_grad_with_place( + place, + ["Input"], + "Output", + max_relative_error=0.03, + no_grad_set={"Filter"}, + ) + elif self.check_no_filter: + self.check_grad( + ["Input"], + "Output", + max_relative_error=0.03, + no_grad_set={"Filter"}, + ) + + def test_check_grad_no_input(self): + if self.use_cudnn: + place = get_device_place() + self.check_grad_with_place( + place, + ["Filter"], + "Output", + max_relative_error=0.03, + no_grad_set={"Input"}, + ) + elif self.check_no_input: + self.check_grad( + ["Filter"], + "Output", + max_relative_error=0.03, + no_grad_set={"Input"}, + ) + + def init_test_case(self): + self.pad = [0, 0, 0] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [2, 3, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + def init_op_type(self): + self.op_type = "conv3d_transpose" + self.python_api = conv3d_transpose_wrapper + + def init_kernel_type(self): + self.dtype = np.float32 + + +class TestWithSymmetricPad(TestConv3DTransposeOp): + def init_test_case(self): + self.check_no_input = True + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + +class TestWithAsymmetricPad(TestConv3DTransposeOp): + def init_test_case(self): + self.pad = [1, 0, 1, 0, 1, 2] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + +class TestWithSAMEPad(TestConv3DTransposeOp): + def init_test_case(self): + self.stride = [1, 1, 2] + self.dilations = [1, 2, 1] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 6] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 4] + self.padding_algorithm = "SAME" + + +class TestWithVALIDPad(TestConv3DTransposeOp): + def init_test_case(self): + self.stride = [2, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 4, 3] + self.padding_algorithm = "VALID" + + +class TestWithStride(TestConv3DTransposeOp): + def init_test_case(self): + self.check_no_filter = True + self.pad = [1, 1, 1] + self.stride = [2, 2, 2] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + +class TestWithGroups(TestConv3DTransposeOp): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 2 + self.input_size = [1, 2, 5, 5, 5] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 3, 3, 3, 3] + + +class TestWithDilation(TestConv3DTransposeOp): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [2, 2, 2] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + +class Test_NHWC(TestConv3DTransposeOp): + def init_test_case(self): + self.pad = [0, 0, 0] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 5, 5, 5, 2] # NDHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3, 3] + self.data_format = "NHWC" + + +# ------------ test_cudnn ------------ +@unittest.skipIf( + not (core.is_compiled_with_cuda() or is_custom_device()), + "core is not compiled with CUDA", +) +class TestCUDNN(TestConv3DTransposeOp): + def init_op_type(self): + self.use_cudnn = True + self.op_type = "conv3d_transpose" + self.python_api = conv3d_transpose_wrapper + + +@unittest.skipIf( + not (core.is_compiled_with_cuda() or is_custom_device()), + "core is not compiled with CUDA", +) +class TestCUDNNWithSymmetricPad(TestWithSymmetricPad): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + def init_op_type(self): + self.use_cudnn = True + self.op_type = "conv3d_transpose" + self.python_api = conv3d_transpose_wrapper + + +@unittest.skipIf( + not (core.is_compiled_with_cuda() or is_custom_device()), + "core is not compiled with CUDA", +) +class TestCUDNNWithAsymmetricPad(TestWithAsymmetricPad): + def init_test_case(self): + self.pad = [1, 1, 1, 0, 0, 2] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 2, 4, 4, 4] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + def init_op_type(self): + self.use_cudnn = True + self.op_type = "conv3d_transpose" + self.python_api = conv3d_transpose_wrapper + + +@unittest.skipIf( + not (core.is_compiled_with_cuda() or is_custom_device()), + "core is not compiled with CUDA", +) +class TestCUDNNWithSAMEPad(TestWithSAMEPad): + def init_test_case(self): + self.stride = [1, 1, 2] + self.dilations = [1, 2, 1] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 4, 3] + self.padding_algorithm = "SAME" + + def init_op_type(self): + self.use_cudnn = True + self.op_type = "conv3d_transpose" + self.python_api = conv3d_transpose_wrapper + + +@unittest.skipIf( + not (core.is_compiled_with_cuda() or is_custom_device()), + "core is not compiled with CUDA", +) +class TestCUDNNWithVALIDPad(TestWithVALIDPad): + def init_test_case(self): + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + self.padding_algorithm = "VALID" + + def init_op_type(self): + self.use_cudnn = True + self.op_type = "conv3d_transpose" + self.python_api = conv3d_transpose_wrapper + + +@unittest.skipIf( + not (core.is_compiled_with_cuda() or is_custom_device()), + "core is not compiled with CUDA", +) +class TestCUDNNWithStride(TestWithStride): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [2, 2, 2] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + def init_op_type(self): + self.use_cudnn = True + self.op_type = "conv3d_transpose" + self.python_api = conv3d_transpose_wrapper + + +@unittest.skipIf( + not (core.is_compiled_with_cuda() or is_custom_device()), + "core is not compiled with CUDA", +) +class TestCUDNNWithGroups(TestWithGroups): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 2 + self.input_size = [1, 2, 5, 5, 5] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 3, 3, 3, 3] + + def init_op_type(self): + self.use_cudnn = True + self.op_type = "conv3d_transpose" + self.python_api = conv3d_transpose_wrapper + + # Please Don't remove the following code. + # Currently, CI use cudnn V5.0 which not support dilation conv. + # class TestCUDNNWithDilation(TestWithDilation): + # def init_test_case(self): + # self.pad = [1, 1, 1] + # self.stride = [2, 2, 2] + # self.dilations = [2, 2, 2] + # self.input_size = [2, 3, 5, 5, 5] # NCDHW + # f_c = self.input_size[1] + # self.filter_size = [f_c, 6, 3, 3, 3] + # + # def init_op_type(self): + # self.op_type = "conv3d_transpose" + self.python_api = conv3d_transpose_wrapper + + +@unittest.skipIf( + not (core.is_compiled_with_cuda() or is_custom_device()), + "core is not compiled with CUDA", +) +class TestCUDNN_NHWC(TestConv3DTransposeOp): + def init_test_case(self): + self.pad = [0, 0, 0] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 5, 5, 5, 2] # NDHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3, 3] + self.data_format = "NHWC" + + def init_op_type(self): + self.use_cudnn = True + self.op_type = "conv3d_transpose" + self.python_api = conv3d_transpose_wrapper + + +@unittest.skipIf( + not (core.is_compiled_with_cuda() or is_custom_device()), + "core is not compiled with CUDA", +) +class TestCUDNNWithSymmetricPad_NHWC(TestWithSymmetricPad): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 5, 5, 5, 2] # NDHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3, 3] + self.data_format = "NHWC" + + def init_op_type(self): + self.use_cudnn = True + self.op_type = "conv3d_transpose" + self.python_api = conv3d_transpose_wrapper + + +@unittest.skipIf( + not (core.is_compiled_with_cuda() or is_custom_device()), + "core is not compiled with CUDA", +) +class TestCUDNNWithAsymmetricPad_NHWC(TestWithAsymmetricPad): + def init_test_case(self): + self.pad = [1, 0, 1, 0, 0, 2] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 5, 5, 5, 2] # NDHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3, 3] + self.data_format = "NHWC" + + def init_op_type(self): + self.use_cudnn = True + self.op_type = "conv3d_transpose" + self.python_api = conv3d_transpose_wrapper + + +@unittest.skipIf( + not (core.is_compiled_with_cuda() or is_custom_device()), + "core is not compiled with CUDA", +) +class TestCUDNNWithStride_NHWC(TestWithStride): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [2, 2, 2] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 5, 5, 5, 2] # NDHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3, 3] + self.data_format = "NHWC" + + def init_op_type(self): + self.use_cudnn = True + self.op_type = "conv3d_transpose" + self.python_api = conv3d_transpose_wrapper + + +@unittest.skipIf( + not (core.is_compiled_with_cuda() or is_custom_device()), + "core is not compiled with CUDA", +) +class TestCUDNNWithGroups_NHWC(TestWithGroups): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 2 + self.input_size = [1, 5, 5, 5, 2] # NDHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 3, 3, 3, 3] + self.data_format = "NHWC" + + def init_op_type(self): + self.use_cudnn = True + self.op_type = "conv3d_transpose" + self.python_api = conv3d_transpose_wrapper + + +# ----------------Conv3DTransposeCUDNN fp16---------------- +create_test_cudnn_fp16_class(TestConv3DTransposeOp) +create_test_cudnn_fp16_class(TestWithSymmetricPad) +create_test_cudnn_fp16_class(TestWithAsymmetricPad) +create_test_cudnn_fp16_class(TestWithSAMEPad) +create_test_cudnn_fp16_class(TestWithVALIDPad) +create_test_cudnn_fp16_class(TestWithStride) +create_test_cudnn_fp16_class(TestWithGroups) +create_test_cudnn_fp16_class(TestWithDilation) +create_test_cudnn_fp16_class(Test_NHWC) + + +# ----------------Conv3DTransposeCUDNN bf16---------------- +create_test_cudnn_bf16_class(TestConv3DTransposeOp) +create_test_cudnn_bf16_class(TestWithSymmetricPad) +create_test_cudnn_bf16_class(TestWithAsymmetricPad) +create_test_cudnn_bf16_class(TestWithSAMEPad) +create_test_cudnn_bf16_class(TestWithVALIDPad) +create_test_cudnn_bf16_class(TestWithStride) +create_test_cudnn_bf16_class(TestWithGroups) +create_test_cudnn_bf16_class(TestWithDilation) +create_test_cudnn_bf16_class(Test_NHWC) + + +class TestConv3dTranspose(unittest.TestCase): + def error_weight_input(self): + array = np.array([1], dtype=np.float32) + x = paddle.to_tensor(np.reshape(array, [1, 1, 1, 1, 1]), dtype="float32") + weight = paddle.to_tensor(np.reshape(array, [1]), dtype="float32") + paddle.nn.functional.conv3d_transpose(x, weight, bias=0) + + def test_type_error(self): + self.assertRaises(ValueError, self.error_weight_input) + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/metax_gpu/tests/unit_test/test_conv3d_transpose_part2_op_metax.py b/backends/metax_gpu/tests/unit_test/test_conv3d_transpose_part2_op_metax.py new file mode 100644 index 00000000000..9bf91f5908f --- /dev/null +++ b/backends/metax_gpu/tests/unit_test/test_conv3d_transpose_part2_op_metax.py @@ -0,0 +1,108 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import unittest + +sys.path.append("../../legacy_test") +from test_conv3d_transpose_op import ( + TestConv3DTransposeOp, + create_test_cudnn_bf16_class, + create_test_cudnn_fp16_class, +) + +from paddle.base import core + +core.set_cudnn_switch(False) + + +class TestWithSymmetricPad_NHWC(TestConv3DTransposeOp): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [2, 5, 5, 5, 3] # NDHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3, 3] + self.data_format = "NHWC" + + +class TestWithAsymmetricPad_NHWC(TestConv3DTransposeOp): + def init_test_case(self): + self.pad = [1, 0, 1, 0, 1, 2] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [2, 5, 5, 5, 3] # NDHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3, 3] + self.data_format = "NHWC" + + +class TestWithGroups_NHWC(TestConv3DTransposeOp): + def init_test_case(self): + self.check_no_filter = True + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 2 + self.input_size = [2, 5, 5, 5, 4] # NDHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 3, 3, 3, 3] + self.data_format = "NHWC" + + +class TestWithStride_NHWC(TestConv3DTransposeOp): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [2, 2, 2] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [2, 5, 5, 5, 3] # NCDHW + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3, 3] + self.data_format = "NHWC" + + +class TestWithDilation_NHWC(TestConv3DTransposeOp): + def init_test_case(self): + self.check_no_input = True + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [2, 2, 2] + self.groups = 1 + self.input_size = [2, 5, 5, 5, 3] # NCDHW + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3, 3] + self.data_format = "NHWC" + + +# ----------------Conv3DTransposeCUDNN fp16---------------- +create_test_cudnn_fp16_class(TestWithSymmetricPad_NHWC) +create_test_cudnn_fp16_class(TestWithAsymmetricPad_NHWC) +create_test_cudnn_fp16_class(TestWithGroups_NHWC) +create_test_cudnn_fp16_class(TestWithStride_NHWC) +create_test_cudnn_fp16_class(TestWithDilation_NHWC) + + +# ----------------Conv3DTransposeCUDNN bf16---------------- +create_test_cudnn_bf16_class(TestWithSymmetricPad_NHWC) +create_test_cudnn_bf16_class(TestWithAsymmetricPad_NHWC) +create_test_cudnn_bf16_class(TestWithGroups_NHWC) +create_test_cudnn_bf16_class(TestWithStride_NHWC) +create_test_cudnn_bf16_class(TestWithDilation_NHWC) + +if __name__ == "__main__": + unittest.main() diff --git a/backends/metax_gpu/tests/unit_test/test_deform_conv2d_metax.py b/backends/metax_gpu/tests/unit_test/test_deform_conv2d_metax.py new file mode 100644 index 00000000000..da5eeb34d0b --- /dev/null +++ b/backends/metax_gpu/tests/unit_test/test_deform_conv2d_metax.py @@ -0,0 +1,323 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +from unittest import TestCase + +import numpy as np +from op_test import get_device_place, is_custom_device + +import paddle +import paddle.nn.initializer as I + +from paddle.base import core + +core.set_cublas_switch(False) + + +class TestDeformConv2D(TestCase): + batch_size = 4 + spatial_shape = (5, 5) + dtype = "float32" + + def setUp(self): + self.in_channels = 2 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [0, 0] + self.stride = [1, 1] + self.dilation = [1, 1] + self.deformable_groups = 1 + self.groups = 1 + self.no_bias = True + + def prepare(self): + np.random.seed(1) + paddle.seed(1) + if isinstance(self.kernel_size, int): + filter_shape = (self.kernel_size,) * 2 + else: + filter_shape = tuple(self.kernel_size) + self.filter_shape = filter_shape + + self.weight = np.random.uniform( + -1, + 1, + (self.out_channels, self.in_channels // self.groups, *filter_shape), + ).astype(self.dtype) + if not self.no_bias: + self.bias = np.random.uniform(-1, 1, (self.out_channels,)).astype( + self.dtype + ) + + def out_size(in_size, pad_size, dilation_size, kernel_size, stride_size): + return ( + in_size + 2 * pad_size - (dilation_size * (kernel_size - 1) + 1) + ) / stride_size + 1 + + out_h = int( + out_size( + self.spatial_shape[0], + self.padding[0], + self.dilation[0], + self.kernel_size[0], + self.stride[0], + ) + ) + out_w = int( + out_size( + self.spatial_shape[1], + self.padding[1], + self.dilation[1], + self.kernel_size[1], + self.stride[1], + ) + ) + out_shape = (out_h, out_w) + + self.input_shape = ( + self.batch_size, + self.in_channels, + *self.spatial_shape, + ) + + self.offset_shape = ( + self.batch_size, + self.deformable_groups * 2 * filter_shape[0] * filter_shape[1], + *out_shape, + ) + + self.mask_shape = ( + self.batch_size, + self.deformable_groups * filter_shape[0] * filter_shape[1], + *out_shape, + ) + + self.input = np.random.uniform(-1, 1, self.input_shape).astype(self.dtype) + + self.offset = np.random.uniform(-1, 1, self.offset_shape).astype(self.dtype) + + self.mask = np.random.uniform(-1, 1, self.mask_shape).astype(self.dtype) + + def static_graph_case_dcn(self): + main = paddle.static.Program() + start = paddle.static.Program() + paddle.enable_static() + with paddle.static.program_guard(main, start): + x = paddle.static.data( + "input", (-1, self.in_channels, -1, -1), dtype=self.dtype + ) + offset = paddle.static.data( + "offset", + ( + -1, + self.deformable_groups + * 2 + * self.filter_shape[0] + * self.filter_shape[1], + -1, + -1, + ), + dtype=self.dtype, + ) + mask = paddle.static.data( + "mask", + ( + -1, + self.deformable_groups + * self.filter_shape[0] + * self.filter_shape[1], + -1, + -1, + ), + dtype=self.dtype, + ) + + y_v1 = paddle.vision.ops.DeformConv2D( + in_channels=self.in_channels, + out_channels=self.out_channels, + kernel_size=self.filter_shape, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + deformable_groups=self.deformable_groups, + weight_attr=I.Assign(self.weight), + bias_attr=False if self.no_bias else I.Assign(self.bias), + )(x, offset, None) + + y_v2 = paddle.vision.ops.DeformConv2D( + in_channels=self.in_channels, + out_channels=self.out_channels, + kernel_size=self.filter_shape, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + deformable_groups=self.deformable_groups, + weight_attr=I.Assign(self.weight), + bias_attr=False if self.no_bias else I.Assign(self.bias), + )(x, offset, mask) + + exe = paddle.static.Executor(self.place) + exe.run(start) + out_v1, out_v2 = exe.run( + main, + feed={ + "input": self.input, + "offset": self.offset, + "mask": self.mask, + }, + fetch_list=[y_v1, y_v2], + ) + return out_v1, out_v2 + + def dygraph_case_dcn(self): + paddle.disable_static() + x = paddle.to_tensor(self.input) + offset = paddle.to_tensor(self.offset) + mask = paddle.to_tensor(self.mask) + + bias = None if self.no_bias else paddle.to_tensor(self.bias) + + deform_conv2d = paddle.vision.ops.DeformConv2D( + in_channels=self.in_channels, + out_channels=self.out_channels, + kernel_size=self.kernel_size, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + deformable_groups=self.deformable_groups, + groups=self.groups, + weight_attr=I.Assign(self.weight), + bias_attr=False if self.no_bias else I.Assign(self.bias), + ) + + y_v1 = deform_conv2d(x, offset) + y_v2 = deform_conv2d(x, offset, mask) + + out_v1 = y_v1.numpy() + out_v2 = y_v2.numpy() + + return out_v1, out_v2 + + def _test_identity(self): + self.prepare() + static_dcn_v1, static_dcn_v2 = self.static_graph_case_dcn() + dy_dcn_v1, dy_dcn_v2 = self.dygraph_case_dcn() + np.testing.assert_array_almost_equal(static_dcn_v1, dy_dcn_v1) + np.testing.assert_array_almost_equal(static_dcn_v2, dy_dcn_v2) + + def test_identity(self): + self.place = paddle.CPUPlace() + self._test_identity() + + if paddle.is_compiled_with_cuda() or is_custom_device(): + self.place = get_device_place() + self._test_identity() + + +# testcases for DeformConv2D +class TestDeformConv2DWithPadding(TestDeformConv2D): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [2, 2] + self.stride = [1, 1] + self.dilation = [1, 1] + self.deformable_groups = 1 + self.groups = 1 + self.no_bias = True + + +class TestDeformConv2DWithBias(TestDeformConv2D): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [2, 2] + self.stride = [1, 1] + self.dilation = [1, 1] + self.deformable_groups = 1 + self.groups = 1 + self.no_bias = False + + +class TestDeformConv2DWithAsynPadding(TestDeformConv2D): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [1, 2] + self.stride = [1, 1] + self.dilation = [1, 1] + self.deformable_groups = 1 + self.groups = 1 + self.no_bias = False + + +class TestDeformConv2DWithDilation(TestDeformConv2D): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [1, 1] + self.stride = [1, 1] + self.dilation = [3, 3] + self.deformable_groups = 1 + self.groups = 1 + self.no_bias = False + + +class TestDeformConv2DWithStride(TestDeformConv2D): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [1, 1] + self.stride = [2, 2] + self.dilation = [1, 1] + self.deformable_groups = 1 + self.groups = 1 + self.no_bias = False + + +class TestDeformConv2DWithDeformable_Groups(TestDeformConv2D): + def setUp(self): + self.in_channels = 5 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [1, 1] + self.stride = [1, 1] + self.dilation = [1, 1] + self.deformable_groups = 5 + self.groups = 1 + self.no_bias = False + + +class TestDeformConv2DWithGroups(TestDeformConv2D): + def setUp(self): + self.in_channels = 5 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [1, 1] + self.stride = [1, 1] + self.dilation = [1, 1] + self.deformable_groups = 1 + self.groups = 5 + self.no_bias = False + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/metax_gpu/tests/unit_test/test_deformable_conv_op_metax.py b/backends/metax_gpu/tests/unit_test/test_deformable_conv_op_metax.py new file mode 100644 index 00000000000..1f26abb73f8 --- /dev/null +++ b/backends/metax_gpu/tests/unit_test/test_deformable_conv_op_metax.py @@ -0,0 +1,504 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from itertools import product + +import numpy as np +from op_test import OpTest + +import paddle + +paddle.enable_static() + +from paddle.base import core + +core.set_cublas_switch(False) + + +def dmc_bilinear(data_im, height, width, h, w): + h_low = int(np.floor(h)) + w_low = int(np.floor(w)) + h_high = h_low + 1 + w_high = w_low + 1 + + lh = h - h_low + lw = w - w_low + hh = 1 - lh + hw = 1 - lw + + v1 = 0 + if h_low >= 0 and w_low >= 0: + v1 = data_im[h_low, w_low] + v2 = 0 + if h_low >= 0 and w_high <= width - 1: + v2 = data_im[h_low, w_high] + v3 = 0 + if h_high <= height - 1 and w_low >= 0: + v3 = data_im[h_high, w_low] + v4 = 0 + if h_high <= height - 1 and w_high <= width - 1: + v4 = data_im[h_high, w_high] + + w1, w2, w3, w4 = hh * hw, hh * lw, lh * hw, lh * lw + val = w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4 + + return val + + +def dconv_im2col_gemm(input, offset, mask, filter, group, conv_param): + in_n, in_c, in_h, in_w = input.shape + out_c, f_c, f_h, f_w = filter.shape + + assert offset.shape == (in_n, 2 * f_h * f_w, in_h, in_w) + assert mask.shape == (in_n, f_h * f_w, in_h, in_w) + assert f_c * group == in_c + assert np.mod(out_c, group) == 0 + + stride, pad, dilation = ( + conv_param["stride"], + conv_param["pad"], + conv_param["dilation"], + ) + out_h = 1 + (in_h + 2 * pad[0] - (dilation[0] * (f_h - 1) + 1)) // stride[0] + out_w = 1 + (in_w + 2 * pad[1] - (dilation[1] * (f_w - 1) + 1)) // stride[1] + assert out_h == in_h + assert out_w == in_w + + col_buffer = np.zeros((in_n, in_c * f_h * f_w, in_h * in_w)) + for n, c, h, w, kh, kw in product( + range(in_n), + range(in_c), + range(out_h), + range(out_w), + range(f_h), + range(f_w), + ): + offset_h_table = offset[n, ::2, h, w].reshape(f_h, f_w) + offset_w_table = offset[n, 1::2, h, w].reshape(f_h, f_w) + mask_table = mask[n, :, h, w].reshape(f_h, f_w) + offset_h = offset_h_table[kh, kw] + offset_w = offset_w_table[kh, kw] + val = 0 + im_h = h * stride[0] + kh * dilation[0] + offset_h - pad[0] + im_w = w * stride[0] + kw * dilation[0] + offset_w - pad[1] + if im_h > -1 and im_w > -1 and im_h < in_h and im_w < in_h: + val = dmc_bilinear(input[n, c], in_h, in_w, im_h, im_w) + val_out = val * mask_table[kh, kw] + col_buffer[n, c * f_h * f_w + kh * f_w + kw, h * in_w + w] = val_out + + out = np.zeros((in_n, group, int(out_c // group), out_h * out_w)) + weight = filter.reshape(group, int(out_c // group), f_c * f_h * f_w) + col_buffer = col_buffer.reshape( + (in_n, group, int(in_c // group * f_h * f_w), in_h * in_w) + ) + for n in range(in_n): + for g in range(group): + out[n, g] = np.matmul(weight[g], col_buffer[n, g]) + out = out.reshape(in_n, out_c, out_h, out_w) + return out + + +def deform_conv2d_wrapper( + x, + offset, + weight, + mask=None, + stride=1, + padding=0, + dilation=1, + deformable_groups=1, + groups=1, + im2col_step=1, +): + return paddle.vision.ops.deform_conv2d( + x, + offset, + weight, + None, + stride, + padding, + dilation, + deformable_groups, + groups, + mask, + ) + + +class TestModulatedDeformableConvOp(OpTest): + def setUp(self): + self.python_api = deform_conv2d_wrapper + self.op_type = "deformable_conv" + self.init_type() + self.init_group() + self.init_dilation() + self.init_test_case() + + conv_param = { + "stride": self.stride, + "pad": self.pad, + "dilation": self.dilations, + } + + input = np.random.random(self.input_size).astype(self.dtype) + offset = 10 * np.random.random(self.offset_size).astype(self.dtype) + mask = 10 * np.random.random(self.mask_size).astype(self.dtype) + filter = np.random.random(self.filter_size).astype(self.dtype) + + output = dconv_im2col_gemm(input, offset, mask, filter, self.groups, conv_param) + output = output.astype(self.dtype) + + self.inputs = { + "Input": OpTest.np_dtype_to_base_dtype(input), + "Offset": OpTest.np_dtype_to_base_dtype(offset), + "Mask": OpTest.np_dtype_to_base_dtype(mask), + "Filter": OpTest.np_dtype_to_base_dtype(filter), + } + self.attrs = { + "strides": self.stride, + "paddings": self.pad, + "groups": self.groups, + "deformable_groups": self.deformable_groups, + "im2col_step": self.im2col_step, + "dilations": self.dilations, + } + self.outputs = {"Output": output} + + def test_check_output(self): + self.check_output(check_pir=True) + + def test_check_grad(self): + self.check_grad( + {"Input", "Offset", "Mask", "Filter"}, + "Output", + max_relative_error=0.05, + check_pir=True, + ) + + def init_test_case(self): + self.pad = [1, 1] + self.stride = [1, 1] + self.dilations = [1, 1] + self.input_size = [2, 8, 4, 4] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [4, f_c, 3, 3] + self.im2col_step = 1 + self.deformable_groups = 1 + offset_c = ( + 2 * self.deformable_groups * self.filter_size[2] * self.filter_size[3] + ) + mask_c = self.deformable_groups * self.filter_size[2] * self.filter_size[3] + self.offset_size = [ + self.input_size[0], + offset_c, + self.input_size[2], + self.input_size[3], + ] + self.mask_size = [ + self.input_size[0], + mask_c, + self.input_size[2], + self.input_size[3], + ] + + def init_dilation(self): + self.dilations = [1, 1] + + def init_group(self): + self.groups = 1 + + def init_type(self): + self.dtype = np.float32 + + +class TestWithStride(TestModulatedDeformableConvOp): + def init_test_case(self): + self.pad = [3, 3] + self.stride = [2, 2] + self.input_size = [2, 3, 5, 5] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + self.im2col_step = 1 + self.deformable_groups = 1 + offset_c = ( + 2 * self.deformable_groups * self.filter_size[2] * self.filter_size[3] + ) + mask_c = self.deformable_groups * self.filter_size[2] * self.filter_size[3] + self.offset_size = [ + self.input_size[0], + offset_c, + self.input_size[2], + self.input_size[3], + ] + self.mask_size = [ + self.input_size[0], + mask_c, + self.input_size[2], + self.input_size[3], + ] + + +class TestWithDilation(TestModulatedDeformableConvOp): + def init_test_case(self): + self.pad = [2, 2] + self.stride = [1, 1] + self.input_size = [4, 3, 4, 4] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + self.im2col_step = 1 + self.deformable_groups = 1 + offset_c = ( + 2 * self.deformable_groups * self.filter_size[2] * self.filter_size[3] + ) + mask_c = self.deformable_groups * self.filter_size[2] * self.filter_size[3] + self.offset_size = [ + self.input_size[0], + offset_c, + self.input_size[2], + self.input_size[3], + ] + self.mask_size = [ + self.input_size[0], + mask_c, + self.input_size[2], + self.input_size[3], + ] + + def init_dilation(self): + self.dilations = [2, 2] + + +class TestWith3x3(TestModulatedDeformableConvOp): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + self.im2col_step = 1 + self.deformable_groups = 1 + offset_c = ( + 2 * self.deformable_groups * self.filter_size[2] * self.filter_size[3] + ) + mask_c = self.deformable_groups * self.filter_size[2] * self.filter_size[3] + self.offset_size = [ + self.input_size[0], + offset_c, + self.input_size[2], + self.input_size[3], + ] + self.mask_size = [ + self.input_size[0], + mask_c, + self.input_size[2], + self.input_size[3], + ] + + +class TestWithGroup(TestModulatedDeformableConvOp): + def init_group(self): + self.groups = 2 + + +class TestWithDouble(TestModulatedDeformableConvOp): + def init_type(self): + self.dtype = np.float64 + + def init_test_case(self): + self.pad = [1, 1] + self.stride = [1, 1] + self.dilations = [1, 1] + self.input_size = [2, 6, 4, 4] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [4, f_c, 3, 3] + self.im2col_step = 1 + self.deformable_groups = 1 + offset_c = ( + 2 * self.deformable_groups * self.filter_size[2] * self.filter_size[3] + ) + mask_c = self.deformable_groups * self.filter_size[2] * self.filter_size[3] + self.offset_size = [ + self.input_size[0], + offset_c, + self.input_size[2], + self.input_size[3], + ] + self.mask_size = [ + self.input_size[0], + mask_c, + self.input_size[2], + self.input_size[3], + ] + + +class TestModulatedDeformableConvInvalidInput(unittest.TestCase): + def test_error_api(self): + def test_invalid_input(): + paddle.enable_static() + input = [1, 3, 32, 32] + offset = paddle.static.data( + name="offset", shape=[None, 3, 32, 32], dtype="float32" + ) + mask = paddle.static.data( + name="mask", shape=[None, 3, 32, 32], dtype="float32" + ) + loss = paddle.vision.ops.DeformConv2D( + in_channels=input[1], out_channels=4, kernel_size=1 + )(input, offset, mask) + + self.assertRaises(TypeError, test_invalid_input) + + def test_invalid_offset(): + paddle.enable_static() + input = paddle.static.data( + name="input", shape=[None, 3, 32, 32], dtype="int32" + ) + offset = paddle.static.data( + name="offset", shape=[None, 3, 32, 32], dtype="float32" + ) + mask = paddle.static.data( + name="mask", shape=[None, 3, 32, 32], dtype="float32" + ) + loss = paddle.vision.ops.DeformConv2D( + in_channels=input.shape[1], out_channels=4, kernel_size=1 + )(input, offset, mask) + + self.assertRaises(TypeError, test_invalid_offset) + + def test_invalid_groups(): + paddle.enable_static() + input = paddle.static.data( + name="input_groups", shape=[1, 1, 1, 1], dtype="float32" + ) + offset = paddle.static.data( + name="offset_groups", shape=[1, 1], dtype="float32" + ) + mask = paddle.static.data(name="mask_groups", shape=[1], dtype="float32") + loss = paddle.vision.ops.DeformConv2D( + in_channels=input.shape[1], + out_channels=1, + kernel_size=1, + padding=1, + groups=0, + )(input, offset, mask) + + self.assertRaises(ZeroDivisionError, test_invalid_groups) + + +class TestDeformConv2DAPI(unittest.TestCase): + def test_api(self): + def test_deform_conv2d_v1(): + paddle.enable_static() + input = paddle.static.data( + name="input_v1", shape=[None, 3, 32, 32], dtype="float32" + ) + offset = paddle.static.data( + name="offset_v1", shape=[None, 4, 32, 32], dtype="float32" + ) + out = paddle.vision.ops.DeformConv2D( + in_channels=input.shape[1], out_channels=4, kernel_size=1 + )(input, offset, None) + assert tuple(out.shape) == (-1, 4, 32, 32) + + test_deform_conv2d_v1() + + def test_deform_conv2d_v2(): + paddle.enable_static() + input = paddle.static.data( + name="input_v2", shape=[None, 3, 32, 32], dtype="float32" + ) + offset = paddle.static.data( + name="offset_v2", shape=[None, 4, 32, 32], dtype="float32" + ) + mask = paddle.static.data( + name="mask_v2", shape=[None, 2, 32, 32], dtype="float32" + ) + out = paddle.vision.ops.DeformConv2D( + in_channels=input.shape[1], out_channels=4, kernel_size=1 + )(input, offset, mask) + + assert tuple(out.shape) == (-1, 4, 32, 32) + + test_deform_conv2d_v2() + + +class TestModulatedDeformableConvOp_ZeroSize(TestModulatedDeformableConvOp): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [1, 1] + self.dilations = [1, 1] + # 0-size + self.input_size = [0, 8, 4, 4] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [4, f_c, 3, 3] + self.im2col_step = 1 + self.deformable_groups = 1 + offset_c = ( + 2 * self.deformable_groups * self.filter_size[2] * self.filter_size[3] + ) + mask_c = self.deformable_groups * self.filter_size[2] * self.filter_size[3] + self.offset_size = [ + self.input_size[0], + offset_c, + self.input_size[2], + self.input_size[3], + ] + self.mask_size = [ + self.input_size[0], + mask_c, + self.input_size[2], + self.input_size[3], + ] + + +class TestDeformConv2DAPI_CPU_FP16(unittest.TestCase): + def setUp(self): + self.padding = [1, 1] + self.stride = [1, 1] + self.dilation = [1, 1] + self.groups = 1 + self.data_format = "NCL" + + def test_cpu_fp16(self): + with paddle.base.dygraph.guard(paddle.CPUPlace()): + x = paddle.ones([4, 5, 5, 5]) + offset = paddle.ones([4, 90, 5, 5]).astype(paddle.float16) + weight = paddle.ones([5, 5, 3, 3]).astype(paddle.float16) + bias = paddle.ones([5]).astype(paddle.float16) + mask = paddle.ones([4, 45, 5, 5]).astype(paddle.float16) + + # If there is an error, an error will be thrown. + out = paddle.vision.ops.deform_conv2d( + x, + offset, + weight, + bias, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + deformable_groups=5, + mask=mask, + ) + np.testing.assert_allclose(out.shape, [4, 5, 5, 5]) + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/metax_gpu/tests/unit_test/test_deformable_conv_v1_op_metax.py b/backends/metax_gpu/tests/unit_test/test_deformable_conv_v1_op_metax.py new file mode 100644 index 00000000000..6a4244db267 --- /dev/null +++ b/backends/metax_gpu/tests/unit_test/test_deformable_conv_v1_op_metax.py @@ -0,0 +1,319 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from itertools import product + +import numpy as np +from op_test import OpTest + +import paddle + +from paddle.base import core + +core.set_cublas_switch(False) + + +def dmc_bilinear(data_im, height, width, h, w): + h_low = int(np.floor(h)) + w_low = int(np.floor(w)) + h_high = h_low + 1 + w_high = w_low + 1 + + lh = h - h_low + lw = w - w_low + hh = 1 - lh + hw = 1 - lw + + v1 = 0 + if h_low >= 0 and w_low >= 0: + v1 = data_im[h_low, w_low] + v2 = 0 + if h_low >= 0 and w_high <= width - 1: + v2 = data_im[h_low, w_high] + v3 = 0 + if h_high <= height - 1 and w_low >= 0: + v3 = data_im[h_high, w_low] + v4 = 0 + if h_high <= height - 1 and w_high <= width - 1: + v4 = data_im[h_high, w_high] + + w1, w2, w3, w4 = hh * hw, hh * lw, lh * hw, lh * lw + val = w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4 + + return val + + +def dconv_im2col_gemm(input, offset, filter, group, conv_param): + in_n, in_c, in_h, in_w = input.shape + out_c, f_c, f_h, f_w = filter.shape + + assert offset.shape == (in_n, 2 * f_h * f_w, in_h, in_w) + assert f_c * group == in_c + assert np.mod(out_c, group) == 0 + + stride, pad, dilation = ( + conv_param["stride"], + conv_param["pad"], + conv_param["dilation"], + ) + out_h = 1 + (in_h + 2 * pad[0] - (dilation[0] * (f_h - 1) + 1)) // stride[0] + out_w = 1 + (in_w + 2 * pad[1] - (dilation[1] * (f_w - 1) + 1)) // stride[1] + assert out_h == in_h + assert out_w == in_w + + col_buffer = np.zeros((in_n, in_c * f_h * f_w, in_h * in_w)) + for n, c, h, w, kh, kw in product( + range(in_n), + range(in_c), + range(out_h), + range(out_w), + range(f_h), + range(f_w), + ): + offset_h_table = offset[n, ::2, h, w].reshape(f_h, f_w) + offset_w_table = offset[n, 1::2, h, w].reshape(f_h, f_w) + offset_h = offset_h_table[kh, kw] + offset_w = offset_w_table[kh, kw] + val = 0 + im_h = h * stride[0] + kh * dilation[0] + offset_h - pad[0] + im_w = w * stride[0] + kw * dilation[0] + offset_w - pad[1] + if im_h > -1 and im_w > -1 and im_h < in_h and im_w < in_h: + val = dmc_bilinear(input[n, c], in_h, in_w, im_h, im_w) + val_out = val + + col_buffer[n, c * f_h * f_w + kh * f_w + kw, h * in_w + w] = val_out + + out = np.zeros((in_n, group, int(out_c // group), out_h * out_w)) + weight = filter.reshape(group, int(out_c // group), f_c * f_h * f_w) + col_buffer = col_buffer.reshape( + (in_n, group, int(in_c // group * f_h * f_w), in_h * in_w) + ) + for n in range(in_n): + for g in range(group): + out[n, g] = np.matmul(weight[g], col_buffer[n, g]) + out = out.reshape(in_n, out_c, out_h, out_w) + return out + + +def deform_conv2d_wrapper( + x, + offset, + weight, + mask=None, + stride=1, + padding=0, + dilation=1, + deformable_groups=1, + groups=1, + im2col_step=1, +): + return paddle.vision.ops.deform_conv2d( + x, + offset, + weight, + None, + stride, + padding, + dilation, + deformable_groups, + groups, + mask, + ) + + +class TestModulatedDeformableConvOp(OpTest): + def setUp(self): + self.python_api = deform_conv2d_wrapper + self.op_type = "deformable_conv_v1" + self.init_type() + self.init_group() + self.init_dilation() + self.init_test_case() + + conv_param = { + "stride": self.stride, + "pad": self.pad, + "dilation": self.dilations, + } + + input = np.random.random(self.input_size).astype(self.dtype) + offset = 10 * np.random.random(self.offset_size).astype(self.dtype) + filter = np.random.random(self.filter_size).astype(self.dtype) + + output = dconv_im2col_gemm(input, offset, filter, self.groups, conv_param) + output = output.astype(self.dtype) + self.inputs = { + "Input": OpTest.np_dtype_to_base_dtype(input), + "Offset": OpTest.np_dtype_to_base_dtype(offset), + "Filter": OpTest.np_dtype_to_base_dtype(filter), + } + self.attrs = { + "strides": self.stride, + "paddings": self.pad, + "groups": self.groups, + "deformable_groups": self.deformable_groups, + "im2col_step": self.im2col_step, + "dilations": self.dilations, + } + self.outputs = {"Output": output} + + def test_check_output(self): + self.check_output(check_pir=True) + + def test_check_grad(self): + self.check_grad( + ["Input", "Offset", "Filter"], + "Output", + max_relative_error=0.05, + check_pir=True, + ) + + def test_check_grad_no_filter(self): + self.check_grad( + ["Input", "Offset"], + "Output", + max_relative_error=0.1, + no_grad_set={"Filter"}, + check_pir=True, + ) + + def init_test_case(self): + self.pad = [1, 1] + self.stride = [1, 1] + self.dilations = [1, 1] + self.input_size = [2, 4, 4, 4] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [4, f_c, 3, 3] + self.im2col_step = 1 + self.deformable_groups = 1 + offset_c = ( + 2 * self.deformable_groups * self.filter_size[2] * self.filter_size[3] + ) + self.offset_size = [ + self.input_size[0], + offset_c, + self.input_size[2], + self.input_size[3], + ] + + def init_dilation(self): + self.dilations = [1, 1] + + def init_group(self): + self.groups = 1 + + def init_type(self): + self.dtype = np.float32 + + +class TestWithStride(TestModulatedDeformableConvOp): + def init_test_case(self): + self.pad = [3, 3] + self.stride = [2, 2] + self.input_size = [2, 3, 5, 5] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + self.im2col_step = 1 + self.deformable_groups = 1 + offset_c = ( + 2 * self.deformable_groups * self.filter_size[2] * self.filter_size[3] + ) + self.offset_size = [ + self.input_size[0], + offset_c, + self.input_size[2], + self.input_size[3], + ] + + +class TestWithDilation(TestModulatedDeformableConvOp): + def init_test_case(self): + self.pad = [2, 2] + self.stride = [1, 1] + self.input_size = [5, 3, 4, 4] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + self.im2col_step = 1 + self.deformable_groups = 1 + offset_c = ( + 2 * self.deformable_groups * self.filter_size[2] * self.filter_size[3] + ) + self.offset_size = [ + self.input_size[0], + offset_c, + self.input_size[2], + self.input_size[3], + ] + + def init_dilation(self): + self.dilations = [2, 2] + + +class TestWith1x1(TestModulatedDeformableConvOp): + def init_test_case(self): + self.pad = [0, 0] + self.stride = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [40, f_c, 1, 1] + self.im2col_step = 1 + self.deformable_groups = 1 + offset_c = ( + 2 * self.deformable_groups * self.filter_size[2] * self.filter_size[3] + ) + self.offset_size = [ + self.input_size[0], + offset_c, + self.input_size[2], + self.input_size[3], + ] + + +class TestWithGroup(TestModulatedDeformableConvOp): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [1, 1] + self.dilations = [1, 1] + self.input_size = [2, 8, 4, 4] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [4, f_c, 3, 3] + self.im2col_step = 1 + self.deformable_groups = 1 + offset_c = ( + 2 * self.deformable_groups * self.filter_size[2] * self.filter_size[3] + ) + self.offset_size = [ + self.input_size[0], + offset_c, + self.input_size[2], + self.input_size[3], + ] + + def init_group(self): + self.groups = 2 + + +class TestWithDouble(TestModulatedDeformableConvOp): + def init_type(self): + self.dtype = np.float64 + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/metax_gpu/tests/unit_test/test_einsum_0d_tensor_metax.py b/backends/metax_gpu/tests/unit_test/test_einsum_0d_tensor_metax.py new file mode 100644 index 00000000000..f3f3bb30e34 --- /dev/null +++ b/backends/metax_gpu/tests/unit_test/test_einsum_0d_tensor_metax.py @@ -0,0 +1,201 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest + +import numpy as np +from numpy.testing import assert_allclose + +import paddle + +from paddle.base import core + +core.set_cublas_switch(False) + +os.environ["NVIDIA_TF32_OVERRIDE"] = "0" + + +class Test0DCase0(unittest.TestCase): + def setUp(self): + paddle.disable_static() + + def tearDown(self): + paddle.enable_static() + + def test_func(self): + x = paddle.rand([]) + x.stop_gradient = False + y = paddle.rand([]) + y.stop_gradient = False + z = paddle.einsum("...,...->...", x, y) + assert_allclose( + z.numpy(), + np.einsum("...,...->...", x.numpy(), y.numpy()), + atol=1e-6, + ) + z.mean().backward() + assert z.shape == [] + assert x.grad.shape == [] + assert y.grad.shape == [] + + +class Test0DCase1(Test0DCase0): + def test_func(self): + x = paddle.rand([]) + x.stop_gradient = False + y = paddle.rand([2, 2]) + y.stop_gradient = False + z = paddle.einsum("...,ij->...", x, y) + assert_allclose( + z.numpy(), np.einsum("...,ij->...", x.numpy(), y.numpy()), atol=1e-6 + ) + z.mean().backward() + assert z.shape == [] + assert x.grad.shape == [] + assert y.grad.shape == [2, 2] + + +class Test0DCase2(Test0DCase0): + def test_func(self): + x = paddle.rand([2, 2]) + x.stop_gradient = False + y = paddle.rand([2, 2]) + y.stop_gradient = False + z = paddle.einsum("ij,ij->", x, y) + assert_allclose( + z.numpy(), np.einsum("ij,ij->", x.numpy(), y.numpy()), atol=1e-6 + ) + z.mean().backward() + assert z.shape == [] + assert x.grad.shape == [2, 2] + assert y.grad.shape == [2, 2] + + +class Test0DCase3(Test0DCase0): + def test_func(self): + x = paddle.rand([2, 2]) + x.stop_gradient = True + y = paddle.rand([2, 2]) + y.stop_gradient = False + z = paddle.einsum("ij,ij->", x, y) + assert_allclose( + z.numpy(), np.einsum("ij,ij->", x.numpy(), y.numpy()), atol=1e-6 + ) + z.mean().backward() + assert z.shape == [] + assert x.grad is None + assert y.grad.shape == [2, 2] + + +class Test0DCase4(Test0DCase0): + def test_func(self): + x = paddle.rand([]) + x.stop_gradient = False + z = paddle.einsum("...->...", x) + assert_allclose(z.numpy(), np.einsum("...->...", x.numpy()), atol=1e-6) + z.mean().backward() + assert z.shape == [] + assert x.grad.shape == [] + assert x.grad.numpy() == 1.0 + + +class Test0DCase5(Test0DCase0): + def test_func(self): + x = paddle.rand([2, 2]) + x.stop_gradient = False + y = paddle.rand([2, 2]) + y.stop_gradient = False + z = paddle.einsum("i...j, i...j->...", x, y) + assert_allclose( + z.numpy(), + np.einsum("i...j, i...j->...", x.numpy(), y.numpy()), + atol=1e-6, + ) + z.mean().backward() + assert z.shape == [] + assert x.grad.shape == [2, 2] + assert y.grad.shape == [2, 2] + + +class Test0DCase6(Test0DCase0): + def test_func(self): + x = paddle.rand([2, 2]) + x.stop_gradient = False + z = paddle.einsum("ij->", x) + assert_allclose(z.numpy(), np.einsum("ij->", x.numpy()), atol=1e-6) + z.mean().backward() + assert z.shape == [] + assert x.grad.shape == [2, 2] + + +class Test0DCase7(Test0DCase0): + def test_func(self): + """ + 3 operands. + """ + x = paddle.rand([2, 2]) + y = paddle.rand([]) + z = paddle.rand([]) + x.stop_gradient = False + y.stop_gradient = False + z.stop_gradient = False + o = paddle.einsum("ij...,...,...->...", x, y, z) + assert_allclose( + o.numpy(), + np.einsum("ij...,...,...->...", x.numpy(), y.numpy(), z.numpy()), + atol=1e-6, + ) + o.mean().backward() + assert o.shape == [] + assert x.grad.shape == [2, 2] + assert y.grad.shape == [] + assert z.grad.shape == [] + + +class Test0DCase8(Test0DCase0): + def test_func(self): + """ + 3 operands. + """ + x = paddle.rand([2, 2]) + y = paddle.rand([]) + z = paddle.rand([]) + e = paddle.rand([3, 1]) + x.stop_gradient = False + y.stop_gradient = False + z.stop_gradient = False + e.stop_gradient = False + o = paddle.einsum("ij...,...,..., km->...", x, y, z, e) + assert_allclose( + o.numpy(), + np.einsum( + "ij...,...,...,km->...", + x.numpy(), + y.numpy(), + z.numpy(), + e.numpy(), + ), + atol=1e-6, + ) + o.mean().backward() + assert o.shape == [] + assert x.grad.shape == [2, 2] + assert y.grad.shape == [] + assert z.grad.shape == [] + assert e.grad.shape == [3, 1] + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/metax_gpu/tests/unit_test/test_fc_op_metax.py b/backends/metax_gpu/tests/unit_test/test_fc_op_metax.py new file mode 100644 index 00000000000..67afd71c5f9 --- /dev/null +++ b/backends/metax_gpu/tests/unit_test/test_fc_op_metax.py @@ -0,0 +1,138 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from op_test import OpTest + +from paddle.base import core + +core.set_cublas_switch(False) + +SEED = 2020 + + +def fc_refer(matrix, with_bias, with_relu=False): + in_n, in_c, in_h, in_w = matrix.input.shape + w_i, w_o = matrix.weights.shape + + x_data = np.reshape(matrix.input, [in_n, in_c * in_h * in_w]) + w_data = np.reshape(matrix.weights, [w_i, w_o]) + b_data = np.reshape(matrix.bias, [1, w_o]) + result = None + + if with_bias: + result = np.dot(x_data, w_data) + b_data + else: + result = np.dot(x_data, w_data) + + if with_relu: + return np.maximum(result, 0) + else: + return result + + +class MatrixGenerate: + def __init__(self, mb, ic, oc, h, w, bias_dims=2): + self.input = np.random.random((mb, ic, h, w)).astype("float32") + self.weights = np.random.random((ic * h * w, oc)).astype("float32") + if bias_dims == 2: + self.bias = np.random.random((1, oc)).astype("float32") + else: + self.bias = np.random.random(oc).astype("float32") + + +class TestFCOp(OpTest): + def config(self): + self.with_bias = True + self.with_relu = True + self.matrix = MatrixGenerate(1, 10, 15, 3, 3, 2) + + def setUp(self): + self.op_type = "fc" + self.config() + + if self.with_bias: + self.inputs = { + "Input": self.matrix.input, + "W": self.matrix.weights, + "Bias": self.matrix.bias, + } + else: + self.inputs = {"Input": self.matrix.input, "W": self.matrix.weights} + + if self.with_relu: + activation_type = "relu" + else: + activation_type = "" + self.attrs = {"use_onednn": False, "activation_type": activation_type} + + self.outputs = {"Out": fc_refer(self.matrix, self.with_bias, self.with_relu)} + + def test_check_output(self): + self.check_output(check_dygraph=False) + + +class TestFCOpNoBias1(TestFCOp): + def config(self): + self.with_bias = False + self.with_relu = False + self.matrix = MatrixGenerate(2, 8, 10, 1, 1, 2) + + +class TestFCOpNoBias2(TestFCOp): + def config(self): + self.with_bias = False + self.with_relu = False + self.matrix = MatrixGenerate(4, 5, 6, 2, 2, 1) + + +class TestFCOpNoBias4(TestFCOp): + def config(self): + self.with_bias = False + self.with_relu = False + self.matrix = MatrixGenerate(1, 32, 64, 3, 3, 1) + + +class TestFCOpWithBias1(TestFCOp): + def config(self): + self.with_bias = True + self.with_relu = False + self.matrix = MatrixGenerate(3, 8, 10, 2, 1, 2) + + +class TestFCOpWithBias2(TestFCOp): + def config(self): + self.with_bias = True + self.with_relu = True + self.matrix = MatrixGenerate(4, 5, 6, 2, 2, 1) + + +class TestFCOpWithBias3(TestFCOp): + def config(self): + self.with_bias = True + self.with_relu = True + self.matrix = MatrixGenerate(1, 64, 32, 3, 3, 1) + + +class TestFCOpWithPadding(TestFCOp): + def config(self): + self.with_bias = True + self.with_relu = True + self.matrix = MatrixGenerate(1, 4, 3, 128, 128, 2) + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/metax_gpu/tests/unit_test/test_fused_conv2d_add_act_op_metax.py b/backends/metax_gpu/tests/unit_test/test_fused_conv2d_add_act_op_metax.py new file mode 100644 index 00000000000..2b405a76367 --- /dev/null +++ b/backends/metax_gpu/tests/unit_test/test_fused_conv2d_add_act_op_metax.py @@ -0,0 +1,429 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from op_test import OpTest, get_device_place, is_custom_device +from test_conv2d_op import conv2d_forward_naive + +from paddle.base import core + +core.set_cudnn_switch(False) + + +def create_test_padding_SAME_class(parent): + class TestPaddingSAMECase(parent): + def init_paddings(self): + self.pad = [0, 0] + self.padding_algorithm = "SAME" + + cls_name = "{}_{}".format(parent.__name__, "PaddingSAMEOp") + TestPaddingSAMECase.__name__ = cls_name + globals()[cls_name] = TestPaddingSAMECase + + +def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): + self.pad = [1, 1] + self.padding_algorithm = "VALID" + + cls_name = "{}_{}".format(parent.__name__, "PaddingVALIDOp") + TestPaddingVALIDCase.__name__ = cls_name + globals()[cls_name] = TestPaddingVALIDCase + + +def create_test_cudnn_channel_last_class(parent): + @unittest.skipIf( + not (core.is_compiled_with_cuda() or is_custom_device()), + "core is not compiled with CUDA", + ) + class TestCudnnChannelLastCase(parent): + def init_test_case(self): + super().init_test_case() + self.data_format = "NHWC" + N, C, H, W = self.input_size + self.input_size = [N, H, W, C] + K1, K2, R, S = self.filter_size + self.filter_size = [K1, R, S, K2] + + def test_check_output(self): + print(self.attrs) + if self.has_cuda(): + place = get_device_place() + self.check_output_with_place(place, atol=1e-5, check_dygraph=False) + + cls_name = "{}_{}".format(parent.__name__, "CudnnChannelLast") + TestCudnnChannelLastCase.__name__ = cls_name + globals()[cls_name] = TestCudnnChannelLastCase + + +class TestFusedConv2dAddActOp(OpTest): + def setUp(self): + self.op_type = "fused_conv2d_add_act" + self.exhaustive_search = False + self.data_format = "NCHW" + self.dtype = np.float32 + self.activation = "relu" + self.add_residual_data = True + self.split_channels = None + self.outputs = None + self.padding_algorithm = "EXIPLICIT" + + self.init_group() + self.init_dilation() + self.init_test_case() + self.init_residual() + self.init_activation() + self.init_paddings() + self.set_search_method() + + conv2d_param = { + "stride": self.stride, + "pad": self.pad, + "dilation": self.dilations, + } + + input = np.random.random(self.input_size).astype(self.dtype) + filter = np.random.random(self.filter_size).astype(self.dtype) + bias = np.random.random(self.filter_size[0]).astype(self.dtype) + + if self.data_format == "NHWC": + filter_nchw = np.transpose(filter, [0, 3, 1, 2]) + else: + filter_nchw = filter + + self.output, _, _, _, _ = conv2d_forward_naive( + input, + filter_nchw, + self.groups, + conv2d_param, + self.padding_algorithm, + self.data_format, + ) + + self.output = self.output.astype(self.dtype) + + self.inputs = { + "Input": OpTest.np_dtype_to_base_dtype(input), + "Filter": OpTest.np_dtype_to_base_dtype(filter), + "Bias": OpTest.np_dtype_to_base_dtype(bias), + } + + if self.add_residual_data: + residual_data = np.random.random(self.output.shape).astype(self.dtype) + self.inputs["ResidualData"] = OpTest.np_dtype_to_base_dtype(residual_data) + self.output += residual_data + + # Add bias + if self.data_format == "NCHW": + self.output = self.output + bias.reshape((1, bias.size, 1, 1)) + else: + self.output = self.output + bias.reshape((1, 1, 1, bias.size)) + + assert self.activation in ["relu", "identity"] + if self.activation == "relu": + self.output = np.maximum(self.output, 0) + + self.attrs = { + "strides": self.stride, + "paddings": self.pad, + "groups": self.groups, + "dilations": self.dilations, + "data_format": self.data_format, + "exhaustive_search": self.exhaustive_search, + "activation": self.activation, + "padding_algorithm": self.padding_algorithm, + } + if self.split_channels is not None: + self.attrs["split_channels"] = self.split_channels + + self.outputs = {"Output": self.output} + + self.set_outputs() + + def has_cuda(self): + return core.is_compiled_with_cuda() or is_custom_device() + + def test_check_output(self): + if self.has_cuda(): + place = get_device_place() + self.check_output_with_place(place, atol=1e-5, check_dygraph=False) + + def init_test_case(self): + self.pad = [0, 0] + self.stride = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + + def init_dilation(self): + self.dilations = [1, 1] + + def init_group(self): + self.groups = 1 + + def init_residual(self): + self.add_residual_data = True + + def init_activation(self): + self.activation = "relu" + + def set_search_method(self): + self.exhaustive_search = False + + def set_outputs(self): + pass + + def init_paddings(self): + self.pad = [0, 0] + self.padding_algorithm = "EXPLICIT" + + +class TestWithoutResidual(TestFusedConv2dAddActOp): + def init_residual(self): + self.add_residual_data = False + + +class TestIdentityActivation(TestFusedConv2dAddActOp): + def init_activation(self): + self.activation = "identity" + + +class TestIdentityActivation1(TestFusedConv2dAddActOp): + def init_activation(self): + self.activation = "identity" + self.add_residual_data = False + + +class TestWithGroup(TestFusedConv2dAddActOp): + def init_group(self): + self.groups = 3 + + +class TestWithDilation(TestFusedConv2dAddActOp): + def init_test_case(self): + self.pad = [0, 0] + self.stride = [1, 1] + self.input_size = [2, 3, 10, 10] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + + def init_dilation(self): + self.dilations = [2, 2] + + def init_group(self): + self.groups = 3 + + +class TestCUDNNExhaustiveSearch(TestFusedConv2dAddActOp): + def set_search_method(self): + self.exhaustive_search = True + + +class TestMultipleOutputs(TestFusedConv2dAddActOp): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [1, 1] + self.input_size = [1, 32, 17, 17] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [126, f_c, 3, 3] + self.split_channels = [84, 42] + + def set_outputs(self): + out1 = self.output[:, 0:84, :, :] + out2 = self.output[:, 84:126, :, :] + self.outputs["Outputs"] = [("out1", out1), ("out2", out2)] + + +class TestAsyPadding(TestFusedConv2dAddActOp): + def init_paddings(self): + self.pad = [0, 0, 1, 2] + self.padding_algorithm = "EXPLICIT" + + +class TestWithPad_AsyPadding(TestFusedConv2dAddActOp): + def init_test_case(self): + self.stride = [1, 1] + self.input_size = [2, 3, 10, 10] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + + def init_paddings(self): + self.pad = [2, 1, 3, 2] + self.padding_algorithm = "EXPLICIT" + + +class TestWithStride_AsyPadding(TestFusedConv2dAddActOp): + def init_test_case(self): + self.stride = [2, 2] + self.input_size = [2, 3, 6, 6] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + + def init_paddings(self): + self.pad = [2, 1, 3, 2] + self.padding_algorithm = "EXPLICIT" + + +class TestWith1x1_AsyPadding(TestFusedConv2dAddActOp): + def init_test_case(self): + self.stride = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 1, 1] + + def init_group(self): + self.groups = 3 + + def init_paddings(self): + self.pad = [2, 2, 4, 0] + self.padding_algorithm = "EXPLICIT" + + +class TestWithGroup_AsyPadding(TestFusedConv2dAddActOp): + def init_group(self): + self.groups = 3 + + +class TestWithDepthWise3x3_AsyPadding(TestFusedConv2dAddActOp): + def init_test_case(self): + self.stride = [1, 1] + self.input_size = [3, 4, 10, 10] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [8, f_c, 3, 3] + + def init_dilation(self): + self.dilations = [2, 2] + + def init_group(self): + self.groups = 4 + + def init_paddings(self): + self.pad = [1, 3, 2, 1] + self.padding_algorithm = "EXPLICIT" + + +class TestWithDepthWise5x5_AsyPadding(TestFusedConv2dAddActOp): + def init_test_case(self): + self.stride = [1, 1] + self.input_size = [2, 4, 10, 10] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [8, f_c, 5, 5] + + def init_group(self): + self.groups = 4 + + def init_paddings(self): + self.pad = [0, 1, 1, 0] + self.padding_algorithm = "EXPLICIT" + + +class TestWithDepthWise7x7_AsyPadding(TestFusedConv2dAddActOp): + def init_test_case(self): + self.stride = [2, 2] + self.input_size = [2, 8, 10, 10] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [16, f_c, 7, 7] + + def init_group(self): + self.groups = 8 + + def init_paddings(self): + self.pad = [1, 3, 4, 1] + self.padding_algorithm = "EXPLICIT" + + +class TestWithDilation_AsyPadding(TestFusedConv2dAddActOp): + def init_test_case(self): + self.stride = [1, 1] + self.input_size = [2, 3, 10, 10] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + + def init_dilation(self): + self.dilations = [2, 2] + + def init_group(self): + self.groups = 3 + + def init_paddings(self): + self.pad = [0, 1, 3, 0] + self.padding_algorithm = "EXPLICIT" + + +class TestWithInput1x1Filter1x1_AsyPadding(TestFusedConv2dAddActOp): + def init_test_case(self): + self.stride = [1, 1] + self.input_size = [2, 3, 1, 1] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 1, 1] + + def init_group(self): + self.groups = 3 + + def init_paddings(self): + self.pad = [0, 3, 4, 0] + self.padding_algorithm = "EXPLICIT" + + +class TestSimpleNHWC(TestFusedConv2dAddActOp): + def init_test_case(self): + self.stride = [1, 1] + self.input_size = [3, 5, 5, 2] # NHWC + self.data_format = "NHWC" + assert np.mod(self.input_size[3], self.groups) == 0 + f_c = self.input_size[3] // self.groups + self.filter_size = [4, 3, 3, f_c] + + def init_group(self): + self.groups = 1 + + def init_paddings(self): + self.pad = [1, 1] + self.padding_algorithm = "EXPLICIT" + + +create_test_padding_SAME_class(TestAsyPadding) +create_test_padding_SAME_class(TestWithPad_AsyPadding) +create_test_padding_SAME_class(TestWithStride_AsyPadding) +create_test_padding_SAME_class(TestWithGroup_AsyPadding) +create_test_padding_SAME_class(TestWithInput1x1Filter1x1_AsyPadding) + +create_test_padding_VALID_class(TestAsyPadding) +create_test_padding_VALID_class(TestWithPad_AsyPadding) +create_test_padding_VALID_class(TestWithStride_AsyPadding) +create_test_padding_VALID_class(TestWithGroup_AsyPadding) +create_test_padding_VALID_class(TestWithInput1x1Filter1x1_AsyPadding) + +create_test_cudnn_channel_last_class(TestAsyPadding) +create_test_cudnn_channel_last_class(TestWithPad_AsyPadding) +create_test_cudnn_channel_last_class(TestWithStride_AsyPadding) +create_test_cudnn_channel_last_class(TestWithGroup_AsyPadding) +create_test_cudnn_channel_last_class(TestWithInput1x1Filter1x1_AsyPadding) + +if __name__ == "__main__": + unittest.main() diff --git a/backends/metax_gpu/tests/unit_test/test_imperative_double_grad_metax.py b/backends/metax_gpu/tests/unit_test/test_imperative_double_grad_metax.py new file mode 100644 index 00000000000..803b00cc6b4 --- /dev/null +++ b/backends/metax_gpu/tests/unit_test/test_imperative_double_grad_metax.py @@ -0,0 +1,1106 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +from unittest import TestCase + +import numpy as np +from op_test import get_device, is_custom_device + +import paddle +import paddle.nn.functional as F +from paddle import base +from paddle.base.wrapped_decorator import wrap_decorator +from paddle.vision.models import resnet50, resnet101 + +from paddle.base import core + +core.set_cudnn_switch(False) + +core.set_cublas_switch(False) + + +def _dygraph_guard_(func): + def __impl__(*args, **kwargs): + if base.in_dygraph_mode(): + return func(*args, **kwargs) + else: + with base.dygraph.guard(): + return func(*args, **kwargs) + + return __impl__ + + +dygraph_guard = wrap_decorator(_dygraph_guard_) + + +def random_var(size, low=-1, high=1, dtype="float32"): + x_np = np.random.uniform(low=low, high=high, size=size).astype(dtype) + return paddle.to_tensor(x_np) + + +class TestEagerGrad(TestCase): + def test_simple_example_eager_grad(self): + np.random.seed(2021) + paddle.set_device("cpu") + np_x = np.random.random((3, 3)) + np_y = np.random.random((3, 1)) + x = paddle.to_tensor(np_x, dtype="float64", stop_gradient=False) + y = paddle.to_tensor(np_y, dtype="float64", stop_gradient=False) + out = paddle.matmul(x, y) + dx = base.dygraph.grad(out, x) + + dout = np.ones_like(np_y) + expected_dx = np.matmul(dout, np.transpose(np_y)) + + # stop_gradient = !create_graph, create_graph default false + self.assertEqual(dx[0].stop_gradient, True) + np.testing.assert_allclose(dx[0].numpy(), expected_dx, rtol=1e-05) + + def test_simple_example_eager_grad_allow_unused(self): + np.random.seed(2021) + paddle.set_device("cpu") + np_x = np.random.random((3, 3)) + np_y = np.random.random((3, 1)) + np_z = np.random.random((3, 1)) + x = paddle.to_tensor(np_x, dtype="float64", stop_gradient=False) + y = paddle.to_tensor(np_y, dtype="float64", stop_gradient=False) + z = paddle.to_tensor(np_z, dtype="float64", stop_gradient=False) + out_z = paddle.nn.functional.sigmoid(z) + out = paddle.matmul(x, y) + + dx = base.dygraph.grad(out, [x, z], allow_unused=True) + dout = np.ones_like(np_y) + expected_dx = np.matmul(dout, np.transpose(np_y)) + np.testing.assert_allclose(dx[0].numpy(), expected_dx, rtol=1e-05) + # stop_gradient = !create_graph, create_graph default false + self.assertEqual(dx[0].stop_gradient, True) + # x is unused input in the graph + self.assertIsNone(dx[1]) + + def test_simple_example_eager_grad_not_allow_unused(self): + np.random.seed(2021) + paddle.set_device("cpu") + np_x = np.random.random((3, 3)) + np_y = np.random.random((3, 1)) + np_z = np.random.random((3, 1)) + x = paddle.to_tensor(np_x, dtype="float64", stop_gradient=False) + y = paddle.to_tensor(np_y, dtype="float64", stop_gradient=False) + z = paddle.to_tensor(np_z, dtype="float64", stop_gradient=False) + out_z = paddle.nn.functional.sigmoid(z) + out = paddle.matmul(x, y) + + try: + # allow_unused is false in default + dx = base.dygraph.grad(out, [x, z]) + except ValueError as e: + error_msg = str(e) + assert error_msg.find("allow_unused") > 0 + + def test_simple_example_eager_grad_duplicate_input(self): + np.random.seed(2021) + paddle.set_device("cpu") + np_x = np.random.random((3, 3)) + np_y = np.random.random((3, 1)) + np_z = np.random.random((3, 1)) + x = paddle.to_tensor(np_x, dtype="float64", stop_gradient=False) + y = paddle.to_tensor(np_y, dtype="float64", stop_gradient=False) + z = paddle.to_tensor(np_z, dtype="float64", stop_gradient=False) + out_z = paddle.nn.functional.sigmoid(z) + out = paddle.matmul(x, y) + + try: + # duplicate input will arise RuntimeError errors + dx = base.dygraph.grad(out, [x, x]) + except RuntimeError as e: + error_msg = str(e) + assert error_msg.find("duplicate") > 0 + + def test_simple_example_eager_grad_duplicate_output(self): + np.random.seed(2021) + paddle.set_device("cpu") + np_x = np.random.random((3, 3)) + np_y = np.random.random((3, 1)) + np_z = np.random.random((3, 1)) + x = paddle.to_tensor(np_x, dtype="float64", stop_gradient=False) + y = paddle.to_tensor(np_y, dtype="float64", stop_gradient=False) + z = paddle.to_tensor(np_z, dtype="float64", stop_gradient=False) + out_z = paddle.nn.functional.sigmoid(z) + out = paddle.matmul(x, y) + + try: + # duplicate output will arise RuntimeError errors + dx = base.dygraph.grad([out, out], [x]) + except RuntimeError as e: + error_msg = str(e) + assert error_msg.find("duplicate") > 0 + + def test_simple_example_eager_two_grad_output(self): + x1 = paddle.to_tensor([1.0, 2.0]) + x1.stop_gradient = False + x2 = paddle.to_tensor([1.0, 2.0]) + x2.stop_gradient = False + out1 = x1 * 2 + out2 = x2 * 2 + + dout2_record_by_hook = [] + + def record_hook(grad): + dout2_record_by_hook.append(grad) + + out2.register_hook(record_hook) + + out3 = paddle.multiply(out1, out2) + out4 = paddle.mean(out3) + egr_dout2, egr_dout3 = paddle.grad([out4], [out2, out3]) + + np.testing.assert_array_equal( + dout2_record_by_hook[0].numpy(), np.array([1.0, 2.0]) + ) + + x1 = paddle.to_tensor([1.0, 2.0]) + x1.stop_gradient = False + x2 = paddle.to_tensor([1.0, 2.0]) + x2.stop_gradient = False + out1 = x1 * 2 + out2 = x2 * 2 + + out3 = paddle.multiply(out1, out2) + out4 = paddle.mean(out3) + dout2, dout3 = paddle.grad([out4], [out2, out3]) + + self.assertEqual(dout2.stop_gradient, egr_dout2.stop_gradient) + self.assertEqual(dout3.stop_gradient, egr_dout3.stop_gradient) + np.testing.assert_array_equal(dout2.numpy(), egr_dout2.numpy()) + np.testing.assert_array_equal(dout3.numpy(), egr_dout3.numpy()) + + +class TestDygraphDoubleGrad(TestCase): + def setUp(self): + self.sort_sum_gradient = False + self.shape = [5, 10] + + def grad( + self, + outputs, + inputs, + grad_outputs=None, + no_grad_vars=None, + retain_graph=None, + create_graph=False, + allow_unused=False, + ): + base.set_flags({"FLAGS_sort_sum_gradient": self.sort_sum_gradient}) + return base.dygraph.grad( + outputs=outputs, + inputs=inputs, + grad_outputs=grad_outputs, + no_grad_vars=no_grad_vars, + retain_graph=retain_graph, + create_graph=create_graph, + allow_unused=allow_unused, + ) + + @dygraph_guard + def test_exception(self): + with self.assertRaises(AssertionError): + self.grad(None, None) + + shape = self.shape + + with self.assertRaises(AssertionError): + self.grad(1, random_var(shape)) + + with self.assertRaises(AssertionError): + self.grad(random_var(shape), 1) + + with self.assertRaises(AssertionError): + self.grad([1], [random_var(shape)]) + + with self.assertRaises(AssertionError): + self.grad([random_var(shape)], [1]) + + with self.assertRaises(AssertionError): + self.grad( + [random_var(shape), random_var(shape)], + [random_var(shape)], + [random_var(shape)], + ) + + with self.assertRaises(AssertionError): + self.grad([random_var(shape)], [random_var(shape)], no_grad_vars=[1]) + + with self.assertRaises(AssertionError): + self.grad([random_var(shape)], [random_var(shape)], no_grad_vars=1) + + @dygraph_guard + def test_simple_example(self): + x = random_var(self.shape) + x.stop_gradient = False + y = x + 1 + + for create_graph in [False, True]: + (dx,) = self.grad([x], [x], create_graph=create_graph, retain_graph=True) + self.assertEqual(dx.shape, x.shape) + self.assertTrue(np.all(dx.numpy() == 1)) + self.assertNotEqual(dx.stop_gradient, create_graph) + + (dx_mul_2,) = self.grad( + [y, x], [x], create_graph=create_graph, retain_graph=True + ) + self.assertEqual(dx_mul_2.shape, x.shape) + self.assertTrue(np.all(dx_mul_2.numpy() == 2)) + self.assertNotEqual(dx_mul_2.stop_gradient, create_graph) + + (none_grad,) = self.grad( + [x], [y], create_graph=create_graph, allow_unused=True + ) + self.assertIsNone(none_grad) + + (grad_with_none_and_not_none,) = self.grad( + [x, y], [y], create_graph=create_graph + ) + self.assertTrue(grad_with_none_and_not_none.shape, x.shape) + self.assertTrue(np.all(grad_with_none_and_not_none.numpy() == 1)) + self.assertNotEqual(grad_with_none_and_not_none.stop_gradient, create_graph) + + @dygraph_guard + def test_example_no_grad_vars(self): + x = random_var(self.shape) + x_np = x.numpy() + numel = x_np.size + x.stop_gradient = False + + y1 = F.relu(x) + y2 = F.relu(x) + z = y1 + y2 + w = z * z + + w_mean = paddle.mean(w) + del y1, z, w + + (dx_actual,) = self.grad([w_mean], [x], create_graph=True, no_grad_vars=[y2]) + + self.assertFalse(y2.stop_gradient) + self.assertFalse(dx_actual.stop_gradient) + + dx_expected = ( + 1.0 / float(numel) * (np.maximum(x_np, 0) + y2.numpy()) * (x_np > 0) * 2 + ).astype("float32") + + np.testing.assert_allclose(dx_actual.numpy(), dx_expected, rtol=1e-05) + + @dygraph_guard + def test_none_one_initial_gradient(self): + numel = 1 + for s in self.shape: + numel *= s + + half_numel = int(numel / 2) + half_x_positive = np.random.uniform(low=1, high=2, size=[half_numel]) + half_x_negative = np.random.uniform(low=-2, high=-1, size=[numel - half_numel]) + x_np = np.array(list(half_x_positive) + list(half_x_negative)).astype("float32") + np.random.shuffle(x_np) + + x = paddle.to_tensor(x_np) + x.stop_gradient = False + + alpha = 0.2 + y = paddle.nn.functional.leaky_relu(x, alpha) + y = y * y + z = y * y + + x_np = x.numpy() + relu_x_np = np.maximum(x_np, alpha * x_np).astype("float32") + relu_x_grad_np = ((x_np > 0) + (x_np < 0) * alpha).astype("float32") + dy_expected = (relu_x_np * relu_x_grad_np * 2).astype("float32") + dz_expected = (np.power(relu_x_np, 3) * relu_x_grad_np * 4).astype("float32") + + random_grad_y = random_var(y.shape, low=1, high=2) + random_grad_z = random_var(z.shape, low=1, high=2) + ones_grad_y = np.ones(y.shape).astype("float32") + ones_grad_z = np.ones(z.shape).astype("float32") + + original_random_grad_y = random_grad_y.numpy() + original_random_grad_z = random_grad_z.numpy() + + for grad_y in [random_grad_y]: + for grad_z in [random_grad_z]: + for create_graph in [False, True]: + (dx_actual,) = self.grad( + outputs=[y, z], + inputs=[x], + grad_outputs=[grad_y, grad_z], + create_graph=create_graph, + retain_graph=True, + ) + + grad_y_np = ones_grad_y if grad_y is None else grad_y.numpy() + grad_z_np = ones_grad_z if grad_z is None else grad_z.numpy() + + dx_expected = dy_expected * grad_y_np + dz_expected * grad_z_np + np.testing.assert_allclose( + dx_actual.numpy(), dx_expected, rtol=1e-05 + ) + + if grad_y is not None: + self.assertTrue(grad_y.stop_gradient) + np.testing.assert_array_equal( + grad_y.numpy(), original_random_grad_y + ) + + if grad_z is not None: + self.assertTrue(grad_z.stop_gradient) + np.testing.assert_array_equal( + grad_z.numpy(), original_random_grad_z + ) + + @dygraph_guard + def test_example_with_gradient_accumulation_and_create_graph(self): + x = random_var(self.shape) + x_np = x.numpy() + numel = x_np.size + x.stop_gradient = False + + y = F.relu(x) + z = y + 1 + w = z * z + + w_mean = paddle.mean(w) + del y, z, w + + (dx_actual,) = self.grad([w_mean], [x], create_graph=True) + del w_mean + + self.assertFalse(dx_actual.stop_gradient) + + # Theoretical result based on math calculation + dx_expected = ( + 1.0 / float(numel) * (np.maximum(x_np, 0) + 1) * (x_np > 0) * 2 + ).astype("float32") + np.testing.assert_allclose(dx_actual.numpy(), dx_expected, rtol=1e-05) + + loss = paddle.mean(dx_actual * dx_actual + x * x) + loss.backward(retain_graph=True) + + x_grad_actual = x.gradient() + x_grad_expected = ( + 2.0 / float(numel) * (x_np + dx_expected * (x_np > 0) * 2 / float(numel)) + ).astype("float32") + np.testing.assert_allclose(x_grad_actual, x_grad_expected, rtol=1e-05) + + for i in range(5): + loss.backward(retain_graph=True) + x_grad_actual = x.gradient() + x_grad_expected = (i + 2) * ( + 2.0 + / float(numel) + * (x_np + dx_expected * (x_np > 0) * 2 / float(numel)) + ).astype("float32") + np.testing.assert_allclose(x_grad_actual, x_grad_expected, rtol=1e-05) + + @dygraph_guard + def test_example_with_gradient_accumulation_and_no_grad_vars(self): + x = random_var(self.shape) + x_np = x.numpy() + numel = x_np.size + x.stop_gradient = False + + y1 = F.relu(x) + y2 = F.relu(x) + z = y1 + y2 + w = z * z + + w_mean = paddle.mean(w) + del y1, z, w + + (dx_actual,) = self.grad( + [w_mean], + [x], + retain_graph=True, + create_graph=True, + no_grad_vars=[y2], + ) + + self.assertFalse(y2.stop_gradient) + self.assertFalse(dx_actual.stop_gradient) + + dx_expected = ( + 1.0 / float(numel) * (np.maximum(x_np, 0) + y2.numpy()) * (x_np > 0) * 2 + ).astype("float32") + np.testing.assert_allclose(dx_actual.numpy(), dx_expected, rtol=1e-05) + + loss = paddle.mean(dx_actual * dx_actual + x * x) + loss.backward() + + x_grad_actual = x.gradient() + x_grad_expected = ( + 2.0 / float(numel) * (x_np + dx_expected * (x_np > 0) * 4 / float(numel)) + ).astype("float32") + np.testing.assert_allclose(x_grad_actual, x_grad_expected, rtol=1e-05) + + @dygraph_guard + def test_example_with_gradient_accumulation_and_not_create_graph(self): + x = random_var(self.shape) + x_np = x.numpy() + numel = x_np.size + x.stop_gradient = False + + y = F.relu(x) + z = y + 1 + w = z * z + + w_mean = paddle.mean(w) + del y, z, w + + (dx_actual,) = self.grad([w_mean], [x], create_graph=False) + del w_mean + + self.assertTrue(dx_actual.stop_gradient) + + dx_expected = ( + 1.0 / float(numel) * (np.maximum(x_np, 0) + 1) * (x_np > 0) * 2 + ).astype("float32") + + np.testing.assert_allclose(dx_actual.numpy(), dx_expected, rtol=1e-05) + + loss = paddle.mean(dx_actual * dx_actual + x * x) + loss.backward() + + x_grad_actual = x.gradient() + x_grad_expected = (2.0 * x_np / float(numel)).astype("float32") + np.testing.assert_allclose(x_grad_actual, x_grad_expected, rtol=1e-05) + + +class TestDygraphDoubleGradSortGradient(TestDygraphDoubleGrad): + def setUp(self): + self.sort_sum_gradient = True + self.shape = [5, 10] + + +class TestDygraphDoubleGradVisitedUniq(TestCase): + def test_compare(self): + value = np.random.uniform(-0.5, 0.5, 100).reshape(10, 2, 5).astype("float32") + + def model_f(input): + linear = paddle.nn.Linear(5, 3) + for i in range(10): + if i == 0: + out = linear(input) + else: + out = out + linear(input) + return out + + base.set_flags({"FLAGS_sort_sum_gradient": True}) + + with base.dygraph.guard(): + paddle.seed(123) + if paddle.framework.use_pir_api(): + with paddle.pir_utils.OldIrGuard(): + # Note: dygraph use self.main_program.global_block().create_parameter(), it's need manual seed to old Program + paddle.framework.random._manual_program_seed(123) + paddle.framework.random._manual_program_seed(123) + else: + paddle.framework.random._manual_program_seed(123) + a = paddle.to_tensor(value) + a.stop_gradient = False + + out = model_f(a) + + dx = base.dygraph.grad( + outputs=[out], + inputs=[a], + create_graph=False, + only_inputs=True, + allow_unused=False, + ) + + grad_1 = dx[0].numpy() + + with base.dygraph.guard(): + paddle.seed(123) + if paddle.framework.use_pir_api(): + with paddle.pir_utils.OldIrGuard(): + # Note: dygraph use self.main_program.global_block().create_parameter(), it's need manual seed to old Program + paddle.framework.random._manual_program_seed(123) + paddle.framework.random._manual_program_seed(123) + else: + paddle.framework.random._manual_program_seed(123) + a = paddle.to_tensor(value) + a.stop_gradient = False + + out = model_f(a) + out.backward() + + grad_2 = a.gradient() + + np.testing.assert_array_equal(grad_1, grad_2) + + +class TestDoubleGradResNet(TestCase): + def setUp(self): + paddle.seed(123) + if paddle.framework.use_pir_api(): + with paddle.pir_utils.OldIrGuard(): + # Note: dygraph use self.main_program.global_block().create_parameter(), it's need manual seed to old Program + paddle.framework.random._manual_program_seed(123) + paddle.framework.random._manual_program_seed(123) + else: + paddle.framework.random._manual_program_seed(123) + self.data = np.random.rand(1, 3, 224, 224).astype(np.float32) + + @dygraph_guard + def test_resnet_resnet50(self): + model = resnet50(pretrained=False) + egr_data = paddle.to_tensor(self.data) + egr_data.stop_gradient = False + egr_out = model(egr_data) + egr_preds = paddle.argmax(egr_out, axis=1) + egr_label_onehot = paddle.nn.functional.one_hot( + paddle.to_tensor(egr_preds), num_classes=egr_out.shape[1] + ) + egr_target = paddle.sum(egr_out * egr_label_onehot, axis=1) + + egr_g = paddle.grad(outputs=egr_target, inputs=egr_out)[0] + egr_g_numpy = egr_g.numpy() + self.assertEqual(list(egr_g_numpy.shape), list(egr_out.shape)) + + model = resnet50(pretrained=False) + data = paddle.to_tensor(self.data) + data.stop_gradient = False + out = model(data) + preds = paddle.argmax(out, axis=1) + label_onehot = paddle.nn.functional.one_hot( + paddle.to_tensor(preds), num_classes=out.shape[1] + ) + target = paddle.sum(out * label_onehot, axis=1) + + g = paddle.grad(outputs=target, inputs=out)[0] + g_numpy = g.numpy() + self.assertEqual(list(g_numpy.shape), list(out.shape)) + + np.testing.assert_array_equal(egr_out, out) + np.testing.assert_array_equal(egr_g_numpy, g_numpy) + + @dygraph_guard + def test_resnet_resnet101(self): + model = resnet101(pretrained=False) + egr_data = paddle.to_tensor(self.data) + egr_data.stop_gradient = False + egr_out = model(egr_data) + egr_preds = paddle.argmax(egr_out, axis=1) + egr_label_onehot = paddle.nn.functional.one_hot( + paddle.to_tensor(egr_preds), num_classes=egr_out.shape[1] + ) + egr_target = paddle.sum(egr_out * egr_label_onehot, axis=1) + + egr_g = paddle.grad(outputs=egr_target, inputs=egr_out)[0] + egr_g_numpy = egr_g.numpy() + self.assertEqual(list(egr_g_numpy.shape), list(egr_out.shape)) + + model = resnet101(pretrained=False) + data = paddle.to_tensor(self.data) + data.stop_gradient = False + out = model(data) + preds = paddle.argmax(out, axis=1) + label_onehot = paddle.nn.functional.one_hot( + paddle.to_tensor(preds), num_classes=out.shape[1] + ) + target = paddle.sum(out * label_onehot, axis=1) + + g = paddle.grad(outputs=target, inputs=out)[0] + g_numpy = g.numpy() + self.assertEqual(list(g_numpy.shape), list(out.shape)) + + np.testing.assert_array_equal(egr_out, out) + np.testing.assert_array_equal(egr_g_numpy, g_numpy) + + +class TestDoubleGradBasics(TestCase): + def test_matmul(self): + input_numpy = np.ones([3, 3]) * 2 + x = paddle.to_tensor(input_numpy, stop_gradient=False, dtype="float32") + y = paddle.to_tensor(input_numpy, stop_gradient=False, dtype="float32") + grad_out = paddle.to_tensor( + np.ones([3, 3]), stop_gradient=False, dtype="float32" + ) + + out = paddle.matmul(x, y, False, False) + new_x_g, new_y_g = paddle.grad( + [out], [x, y], [grad_out], retain_graph=True, create_graph=True + ) + new_x_g.backward() + + out_ref = np.ones([3, 3]) * 12.0 + np.testing.assert_array_equal(out.numpy(), out_ref) + + new_x_g_ref = np.ones([3, 3]) * 6.0 + new_y_g_ref = np.ones([3, 3]) * 6.0 + np.testing.assert_array_equal(new_x_g.numpy(), new_x_g_ref) + np.testing.assert_array_equal(new_y_g.numpy(), new_y_g_ref) + + x_grad_ref = np.ones([3, 3]) * 0.0 + np.testing.assert_array_equal(x.grad.numpy(), x_grad_ref) + + y_grad_ref = np.ones([3, 3]) * 3.0 + np.testing.assert_array_equal(y.grad.numpy(), y_grad_ref) + + grad_out_grad_ref = np.ones([3, 3]) * 6.0 + np.testing.assert_array_equal(grad_out.grad.numpy(), grad_out_grad_ref) + + +class TestDygraphDoubleGradMatmul(TestCase): + # case1: ddy is none, no broadcast,dims != 1 + def test_matmul_double_grad_case1(self): + input_numpy_x = np.random.random([3, 3]).astype("float32") + input_numpy_y = np.random.random([3, 3]).astype("float32") + + def actual(): + x = paddle.to_tensor(input_numpy_x, stop_gradient=False, dtype="float32") + y = paddle.to_tensor(input_numpy_y, stop_gradient=False, dtype="float32") + out = paddle.matmul(x, y, False, False) + + dout = paddle.to_tensor( + np.ones([3, 3]), stop_gradient=False, dtype="float32" + ) + (dx, dy) = paddle.grad( + [out], [x, y], [dout], retain_graph=True, create_graph=True + ) + ddx = paddle.to_tensor( + np.ones([3, 3]), stop_gradient=False, dtype="float32" + ) + ddy = ddx + dx_double_grad, dy_double_grad, ddout = paddle.grad( + [dx, dy], + [x, y, dout], + [ddx, ddy], + retain_graph=True, + create_graph=True, + ) + return dx_double_grad, dy_double_grad, ddout + + def expected(): + dx_double_grad_expected = np.matmul( + np.ones([3, 3], dtype="float32"), + np.ones([3, 3], dtype="float32"), + ) + dy_double_grad_expected = np.matmul( + np.ones([3, 3], dtype="float32"), + np.ones([3, 3], dtype="float32"), + ) + ddout_expected1 = np.matmul(np.ones([3, 3], dtype="float32"), input_numpy_y) + ddout_expected2 = np.matmul(input_numpy_x, np.ones([3, 3], dtype="float32")) + ddout_expected = ddout_expected1 + ddout_expected2 + return ( + dx_double_grad_expected, + dy_double_grad_expected, + ddout_expected, + ) + + expected_results = expected() + places = ["cpu"] + if paddle.is_compiled_with_cuda() or is_custom_device(): + places.append(get_device()) + for place in places: + paddle.device.set_device(place) + actual_results = actual() + for expected_result, actual_result in zip(expected_results, actual_results): + np.testing.assert_allclose(expected_result, actual_result, rtol=1e-6) + + # case2: ddx is none,no broadcast, dims != 1 + def test_matmul_double_grad_case2(self): + input_numpy_x = np.random.random([3, 3]).astype("float32") + input_numpy_y = np.random.random([3, 3]).astype("float32") + + def actual(): + x = paddle.to_tensor(input_numpy_x, stop_gradient=False, dtype="float32") + y = paddle.to_tensor(input_numpy_y, stop_gradient=False, dtype="float32") + out = paddle.matmul(x, y, False, False) + + dout = paddle.to_tensor( + np.ones([3, 3]), stop_gradient=False, dtype="float32" + ) + (dy,) = paddle.grad( + [out], [y], [dout], retain_graph=True, create_graph=True + ) + ddy = paddle.to_tensor( + np.ones([3, 3]), stop_gradient=False, dtype="float32" + ) + # when x isnot be differentiate in first grad dy in second grad could be None in composite op + dx_double_grad, ddout = paddle.grad( + [dy], + [x, dout], + [ddy], + retain_graph=True, + create_graph=True, + ) + return dx_double_grad, ddout + + def expected(): + dx_double_grad_expected = np.matmul( + np.ones([3, 3], dtype="float32"), + np.ones([3, 3], dtype="float32"), + ) + ddout_expected = np.matmul(input_numpy_x, np.ones([3, 3], dtype="float32")) + return ( + dx_double_grad_expected, + ddout_expected, + ) + + expected_results = expected() + places = ["cpu"] + if paddle.is_compiled_with_cuda() or is_custom_device(): + places.append(get_device()) + for place in places: + paddle.device.set_device(place) + actual_results = actual() + for expected_result, actual_result in zip(expected_results, actual_results): + np.testing.assert_allclose(expected_result, actual_result, rtol=1e-6) + + # case3: ddx is none, dims = 1 + def test_matmul_double_grad_case3(self): + input_numpy_x = np.random.random([3]).astype("float32") + input_numpy_y = np.random.random([3]).astype("float32") + + def actual(): + x = paddle.to_tensor(input_numpy_x, stop_gradient=False, dtype="float32") + y = paddle.to_tensor(input_numpy_y, stop_gradient=False, dtype="float32") + out = paddle.matmul(x, y, False, False) + + dout = paddle.to_tensor(np.ones([1]), stop_gradient=False, dtype="float32") + (dy,) = paddle.grad( + [out], [y], [dout], retain_graph=True, create_graph=True + ) + ddy = paddle.to_tensor(np.ones([3]), stop_gradient=False, dtype="float32") + # when x is not be differentiate in first grad, dy from second grad could be None in composite api. + dx_double_grad, ddout = paddle.grad( + [dy], + [x, dout], + [ddy], + retain_graph=True, + create_graph=True, + ) + return dx_double_grad, ddout + + def expected(): + dx_double_grad_expected = np.ones([3], dtype="float32") + ddout_expected = np.matmul(input_numpy_x, np.ones([3], dtype="float32")) + return ( + dx_double_grad_expected, + ddout_expected, + ) + + expected_results = expected() + places = ["cpu"] + if paddle.is_compiled_with_cuda() or is_custom_device(): + places.append(get_device()) + for place in places: + paddle.device.set_device(place) + actual_results = actual() + for expected_result, actual_result in zip(expected_results, actual_results): + np.testing.assert_allclose(expected_result, actual_result, rtol=1e-6) + + # case4: ddy is none, dims = 1 + def test_matmul_double_grad_case4(self): + input_numpy_x = np.random.random([3]).astype("float32") + input_numpy_y = np.random.random([3]).astype("float32") + + def actual(): + x = paddle.to_tensor(input_numpy_x, stop_gradient=False, dtype="float32") + y = paddle.to_tensor(input_numpy_y, stop_gradient=False, dtype="float32") + out = paddle.matmul(x, y, False, False) + + dout = paddle.to_tensor(np.ones([1]), stop_gradient=False, dtype="float32") + (dx,) = paddle.grad( + [out], [x], [dout], retain_graph=True, create_graph=True + ) + ddx = paddle.to_tensor(np.ones([3]), stop_gradient=False, dtype="float32") + # when y is not be differentiate in first grad, dx from second grad could be None in composite api. + dy_double_grad, ddout = paddle.grad( + [dx], + [y, dout], + [ddx], + retain_graph=True, + create_graph=True, + ) + return dy_double_grad, ddout + + def expected(): + dy_double_grad_expected = np.ones([3], dtype="float32") + ddout_expected = np.matmul(input_numpy_y, np.ones([3], dtype="float32")) + return ( + dy_double_grad_expected, + ddout_expected, + ) + + expected_results = expected() + places = ["cpu"] + if paddle.is_compiled_with_cuda() or is_custom_device(): + places.append(get_device()) + for place in places: + paddle.device.set_device(place) + actual_results = actual() + + for expected_result, actual_result in zip(expected_results, actual_results): + np.testing.assert_allclose(expected_result, actual_result, rtol=1e-6) + + # case5: ddx is none, broadcast, dims != 1 + def test_matmul_double_grad_case5(self): + input_numpy_x = np.random.random([2, 1]).astype("float32") + input_numpy_y = np.random.random([1]).astype("float32") + + def actual(): + x = paddle.to_tensor(input_numpy_x, stop_gradient=False, dtype="float32") + y = paddle.to_tensor(input_numpy_y, stop_gradient=False, dtype="float32") + out = paddle.matmul(x, y, False, False) + + dout = paddle.to_tensor(np.ones([2]), stop_gradient=False, dtype="float32") + (dy,) = paddle.grad( + [out], [y], [dout], retain_graph=True, create_graph=True + ) + ddy = paddle.to_tensor(np.ones([1]), stop_gradient=False, dtype="float32") + dx_double_grad, ddout = paddle.grad( + [dy], + [x, dout], + [ddy], + retain_graph=True, + create_graph=True, + ) + return dx_double_grad, ddout + + def expected(): + dx_double_grad_expected = np.ones([2, 1], dtype="float32") + ddout_expected = np.matmul(input_numpy_x, np.ones([1], dtype="float32")) + return ( + dx_double_grad_expected, + ddout_expected, + ) + + expected_results = expected() + places = ["cpu"] + if paddle.is_compiled_with_cuda() or is_custom_device(): + places.append(get_device()) + for place in places: + paddle.device.set_device(place) + actual_results = actual() + for expected_result, actual_result in zip(expected_results, actual_results): + np.testing.assert_allclose(expected_result, actual_result, rtol=1e-6) + + # case6: ddy is none, broadcast, dims != 1 + def test_matmul_double_grad_case6(self): + input_numpy_x = np.random.random([2, 1]).astype("float32") + input_numpy_y = np.random.random([1]).astype("float32") + + def actual(): + x = paddle.to_tensor(input_numpy_x, stop_gradient=False, dtype="float32") + y = paddle.to_tensor(input_numpy_y, stop_gradient=False, dtype="float32") + out = paddle.matmul(x, y, False, False) + + dout = paddle.to_tensor(np.ones([2]), stop_gradient=False, dtype="float32") + (dx,) = paddle.grad( + [out], [x], [dout], retain_graph=True, create_graph=True + ) + ddx = paddle.to_tensor( + np.ones([2, 1]), stop_gradient=False, dtype="float32" + ) + dy_double_grad, ddout = paddle.grad( + [dx], + [y, dout], + [ddx], + retain_graph=True, + create_graph=True, + ) + return dy_double_grad, ddout + + def expected(): + dy_double_grad_expected = np.ones([1], dtype="float32") * 2 + ddout_expected = np.ones([2], dtype="float32") * input_numpy_y[0] + return ( + dy_double_grad_expected, + ddout_expected, + ) + + expected_results = expected() + places = ["cpu"] + if paddle.is_compiled_with_cuda() or is_custom_device(): + places.append(get_device()) + for place in places: + paddle.device.set_device(place) + actual_results = actual() + for expected_result, actual_result in zip(expected_results, actual_results): + np.testing.assert_allclose(expected_result, actual_result, rtol=1e-6) + + # TODO(Ruting) test complex dtype when composite api support + """ + # case7: ddx is none, dims = 1, complex dtype + def test_matmul_double_grad_case7(self): + input_numpy_x = np.random.random([3]).astype( + 'float32' + ) + 1j * np.random.random([3]).astype('float32') + input_numpy_y = np.random.random([3]).astype( + 'float32' + ) + 1j * np.random.random([3]).astype('float32') + input_numpy_y_conj = np.conjugate(input_numpy_y) + + def actual(): + x = paddle.to_tensor( + input_numpy_x, stop_gradient=False, dtype='complex64' + ) + y = paddle.to_tensor( + input_numpy_y, stop_gradient=False, dtype='complex64' + ) + out = paddle.matmul(x, y, False, False) + + dout = paddle.to_tensor( + np.ones([1]), stop_gradient=False, dtype='complex64' + ) + (dx,) = paddle.grad( + [out], [x], [dout], retain_graph=True, create_graph=True + ) + ddx = paddle.to_tensor( + np.ones([3]), stop_gradient=False, dtype='complex64' + ) + # when y is not be differentiate in first grad, dx from second grad could be None in composite api. + dy_double_grad, ddout = paddle.grad( + [dx], + [y, dout], + [ddx], + retain_graph=True, + create_graph=True, + ) + return dy_double_grad, ddout + + def expected(): + dy_double_grad_expected = np.ones( + [3], dtype="float32" + ) + 0j * np.ones([3], dtype="float32") + ddout_expected = np.matmul( + input_numpy_y_conj, np.ones([3], dtype="float32") + ) + return ( + dy_double_grad_expected, + ddout_expected, + ) + + expected_results = expected() + places = ["cpu"] + if (paddle.is_compiled_with_cuda() or is_custom_device()): + places.append(get_device()) + for place in places: + paddle.device.set_device(place) + actual_results = actual() + for expected_result, actual_result in zip( + expected_results, actual_results + ): + np.testing.assert_allclose( + expected_result, actual_result, rtol=1e-6 + ) + + + # case8: ddy is none, dims = 1, complex dtype + def test_matmul_double_grad_case8(self): + input_numpy_x = np.random.random([3]).astype( + 'float32' + ) + 1j * np.random.random([3]).astype('float32') + input_numpy_y = np.random.random([3]).astype( + 'float32' + ) + 1j * np.random.random([3]).astype('float32') + input_numpy_x_conj = np.conjugate(input_numpy_x) + + def actual(): + x = paddle.to_tensor( + input_numpy_x, stop_gradient=False, dtype='complex64' + ) + y = paddle.to_tensor( + input_numpy_y, stop_gradient=False, dtype='complex64' + ) + out = paddle.matmul(x, y, False, False) + + dout = paddle.to_tensor( + np.ones([1]), stop_gradient=False, dtype='complex64' + ) + (dy,) = paddle.grad( + [out], [y], [dout], retain_graph=True, create_graph=True + ) + ddy = paddle.to_tensor( + np.ones([3]), stop_gradient=False, dtype='complex64' + ) + dx_double_grad, ddout = paddle.grad( + [dy], + [x, dout], + [ddy], + retain_graph=True, + create_graph=True, + ) + return dx_double_grad, ddout + + def expected(): + dx_double_grad_expected = np.ones([3], dtype="float32") + ddout_expected = np.matmul( + input_numpy_x_conj, np.ones([3], dtype="float32") + ) + return ( + dx_double_grad_expected, + ddout_expected, + ) + + expected_results = expected() + places = ["cpu"] + if (paddle.is_compiled_with_cuda() or is_custom_device()): + places.append(get_device()) + for place in places: + paddle.device.set_device(place) + actual_results = actual() + for expected_result, actual_result in zip( + expected_results, actual_results + ): + np.testing.assert_allclose( + expected_result, actual_result, rtol=1e-6 + ) + """ + + def test_value_error(self): + def test(): + import paddle + from paddle import nn + + model = nn.Sequential(nn.Linear(3, 4)) + + x = paddle.randn([4, 1]) + y = paddle.randn([4, 1]) + z = paddle.randn([4, 1]) + x.stop_gradient = False + y.stop_gradient = False + z.stop_gradient = False + out = model(paddle.concat((x, y, z), axis=1)) + + data = { + "x": x, + "y": y, + "z": z, + "u": out[:, 0:1], + "v": out[:, 1:2], + "w": out[:, 2:3], + "p": out[:, 3:4], + } + + v = out[:, 1:2] + z = paddle.grad(v, x, create_graph=True)[0] + zz = paddle.grad(z, x, create_graph=True)[0] + + with self.assertRaises(ValueError): + test() + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/metax_gpu/tests/unit_test/test_linalg_matrix_exp_metax.py b/backends/metax_gpu/tests/unit_test/test_linalg_matrix_exp_metax.py new file mode 100644 index 00000000000..e39de09d6e4 --- /dev/null +++ b/backends/metax_gpu/tests/unit_test/test_linalg_matrix_exp_metax.py @@ -0,0 +1,268 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import unittest + +import numpy as np +import scipy +from op_test import get_places + +import paddle + +from paddle.base import core + +core.set_cublas_switch(False) + +os.environ["NVIDIA_TF32_OVERRIDE"] = "0" + +if sys.platform == "win32": + RTOL = {"float32": 1e-02, "float64": 1e-04} + ATOL = {"float32": 1e-02, "float64": 1e-04} +elif sys.platform == "darwin": + RTOL = {"float32": 1e-06, "float64": 1e-12} + ATOL = {"float32": 1e-06, "float64": 1e-12} +elif scipy.__version__ < "1.15": + RTOL = {"float32": 1e-06, "float64": 1e-15} + ATOL = {"float32": 1e-06, "float64": 1e-15} +else: + RTOL = {"float32": 1e-06, "float64": 1e-13} + ATOL = {"float32": 1e-06, "float64": 1e-13} + + +class MatrixExpTestCase(unittest.TestCase): + def setUp(self): + self.init_config() + self.generate_input() + self.generate_output() + self.places = get_places() + + def generate_input(self): + self._input_shape = (5, 5) + np.random.seed(123) + self._input_data = np.random.random(self._input_shape).astype(self.dtype) + + def generate_output(self): + self._output_data = scipy.linalg.expm(self._input_data) + + def init_config(self): + self.dtype = "float64" + + def test_dygraph(self): + for place in self.places: + paddle.disable_static(place) + x = paddle.to_tensor(self._input_data, place=place) + out = paddle.linalg.matrix_exp(x).numpy() + + np.testing.assert_allclose( + out, + self._output_data, + rtol=RTOL.get(self.dtype), + atol=ATOL.get(self.dtype), + ) + + # TODO(megemini): cond/while_loop should be tested in pir + # + def test_static(self): + paddle.enable_static() + + for place in get_places(): + with paddle.static.program_guard( + paddle.static.Program(), paddle.static.Program() + ): + x = paddle.static.data( + name="input", + shape=self._input_shape, + dtype=self._input_data.dtype, + ) + + out = paddle.linalg.matrix_exp(x) + exe = paddle.static.Executor(place) + + res = exe.run( + feed={"input": self._input_data}, + fetch_list=[out], + )[0] + + np.testing.assert_allclose( + res, + self._output_data, + rtol=RTOL.get(self.dtype), + atol=ATOL.get(self.dtype), + ) + + def test_grad(self): + for place in self.places: + x = paddle.to_tensor(self._input_data, place=place, stop_gradient=False) + out = paddle.linalg.matrix_exp(x) + out.backward() + x_grad = x.grad + + self.assertEqual(list(x_grad.shape), list(x.shape)) + self.assertEqual(x_grad.dtype, x.dtype) + + +class MatrixExpTestCaseFloat32(MatrixExpTestCase): + def init_config(self): + self.dtype = "float32" + + +class MatrixExpTestCase3D(MatrixExpTestCase): + def generate_input(self): + self._input_shape = (2, 5, 5) + np.random.seed(123) + self._input_data = np.random.random(self._input_shape).astype(self.dtype) + + +class MatrixExpTestCase3DFloat32(MatrixExpTestCase3D): + def init_config(self): + self.dtype = "float32" + + +class MatrixExpTestCase4D(MatrixExpTestCase): + def generate_input(self): + self._input_shape = (2, 3, 5, 5) + np.random.seed(123) + self._input_data = np.random.random(self._input_shape).astype(self.dtype) + + +class MatrixExpTestCase4DFloat32(MatrixExpTestCase4D): + def init_config(self): + self.dtype = "float32" + + +class MatrixExpTestCaseEmpty(MatrixExpTestCase): + def generate_input(self): + self._input_shape = () + np.random.seed(123) + self._input_data = np.random.random(self._input_shape).astype(self.dtype) + + +class MatrixExpTestCaseEmptyFloat32(MatrixExpTestCaseEmpty): + def init_config(self): + self.dtype = "float32" + + +class MatrixExpTestCaseScalar(MatrixExpTestCase): + def generate_input(self): + self._input_shape = (2, 3, 1, 1) + np.random.seed(123) + self._input_data = np.random.random(self._input_shape).astype(self.dtype) + + +class MatrixExpTestCaseScalarFloat32(MatrixExpTestCaseScalar): + def init_config(self): + self.dtype = "float32" + + +# test precision for float32 with l1_norm comparing `conds` +class MatrixExpTestCasePrecisionFloat32L1norm0(MatrixExpTestCase): + def init_config(self): + self.dtype = "float32" + + def generate_input(self): + self._input_shape = (2, 2) + self._input_data = np.array([[0, 0.2], [-0.2, 0]]).astype(self.dtype) + + +class MatrixExpTestCasePrecisionFloat32L1norm1(MatrixExpTestCase): + def init_config(self): + self.dtype = "float32" + + def generate_input(self): + self._input_shape = (2, 2) + self._input_data = np.array([[0, 0.8], [-0.8, 0]]).astype(self.dtype) + + +class MatrixExpTestCasePrecisionFloat32L1norm2(MatrixExpTestCase): + def init_config(self): + self.dtype = "float32" + + def generate_input(self): + self._input_shape = (2, 2) + self._input_data = np.array([[0, 2.0], [-2.0, 0]]).astype(self.dtype) + + +# test precision for float64 with l1_norm comparing `conds` +class MatrixExpTestCasePrecisionFloat64L1norm0(MatrixExpTestCase): + def init_config(self): + self.dtype = "float64" + + def generate_input(self): + self._input_shape = (2, 2) + self._input_data = np.array([[0, 0.01], [-0.01, 0]]).astype(self.dtype) + + +class MatrixExpTestCasePrecisionFloat64L1norm1(MatrixExpTestCase): + def init_config(self): + self.dtype = "float64" + + def generate_input(self): + self._input_shape = (2, 2) + self._input_data = np.array([[0, 0.1], [-0.1, 0]]).astype(self.dtype) + + +class MatrixExpTestCasePrecisionFloat64L1norm2(MatrixExpTestCase): + def init_config(self): + self.dtype = "float64" + + def generate_input(self): + self._input_shape = (2, 2) + self._input_data = np.array([[0, 0.5], [-0.5, 0]]).astype(self.dtype) + + +class MatrixExpTestCasePrecisionFloat64L1norm3(MatrixExpTestCase): + def init_config(self): + self.dtype = "float64" + + def generate_input(self): + self._input_shape = (2, 2) + self._input_data = np.array([[0, 1.5], [-1.5, 0]]).astype(self.dtype) + + +class MatrixExpTestCasePrecisionFloat64L1norm4(MatrixExpTestCase): + def init_config(self): + self.dtype = "float64" + + def generate_input(self): + self._input_shape = (2, 2) + self._input_data = np.array([[0, 2.5], [-2.5, 0]]).astype(self.dtype) + + +# test error cases +class MatrixExpTestCaseError(unittest.TestCase): + def test_error_dtype(self): + with self.assertRaises(ValueError): + x = np.array(123, dtype=int) + paddle.linalg.matrix_exp(x) + + def test_error_ndim(self): + # 1-d + with self.assertRaises(ValueError): + x = np.random.rand(1) + paddle.linalg.matrix_exp(x) + + # not square + with self.assertRaises(ValueError): + x = np.random.rand(3, 4) + paddle.linalg.matrix_exp(x) + + with self.assertRaises(ValueError): + x = np.random.rand(2, 3, 4) + paddle.linalg.matrix_exp(x) + + +if __name__ == "__main__": + unittest.main()