PaddlePaddle · iclementine · Feb 16, 2022 · Feb 15, 2022 · Feb 15, 2022 · Feb 15, 2022
diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
@@ -1,5 +1,9 @@
 include(operators)
 
+# solve "math constants not defined" problems caused by the order of inclusion 
+# of <cmath> and the definition of macro _USE_MATH_DEFINES
+add_definitions(-D_USE_MATH_DEFINES)
+
 # clean cache and pybind_file content first when rebuild
 unset(GLOB_OP_LIB CACHE)
 unset(OP_LIBRARY CACHE)

diff --git a/paddle/fluid/operators/angle_op.h b/paddle/fluid/operators/angle_op.h
@@ -17,7 +17,7 @@
 #define _USE_MATH_DEFINES
 #endif
 #include <cmath>
-#include "paddle/fluid/operators/math/complex_functors.h"
+#include "paddle/pten/kernels/funcs/complex_functors.h"
 
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
@@ -26,81 +26,6 @@
 namespace paddle {
 namespace operators {
 
-namespace math {
-template <typename T, typename Enable = void>
-struct AngleFunctor;
-
-// angel function for complex
-template <typename T>
-struct AngleFunctor<T, Complex<T, Real<T>>> {
-  AngleFunctor(const T* input, Real<T>* output, int64_t numel)
-      : input_(input), output_(output), numel_(numel) {}
-
-  HOSTDEVICE void operator()(int64_t idx) const {
-    output_[idx] = arg(input_[idx]);
-  }
-
-  const T* input_;
-  Real<T>* output_;
-  int64_t numel_;
-};
-
-// angel function for real
-template <typename T>
-struct AngleFunctor<T, NoComplex<T, Real<T>>> {
-  AngleFunctor(const T* input, T* output, int64_t numel)
-      : input_(input), output_(output), numel_(numel) {}
-
-  HOSTDEVICE void operator()(int64_t idx) const {
-    output_[idx] = input_[idx] < static_cast<T>(0) ? M_PI : 0;
-  }
-
-  const T* input_;
-  T* output_;
-  int64_t numel_;
-};
-
-template <typename T, typename Enable = void>
-struct AngleGradFunctor;
-
-// angle grad for complex
-template <typename T>
-struct AngleGradFunctor<T, Complex<T, Real<T>>> {
-  AngleGradFunctor(const math::Real<T>* dout, const T* x, T* dx, int64_t numel)
-      : dout_(dout), x_(x), dx_(dx), numel_(numel) {}
-
-  HOSTDEVICE void operator()(int64_t idx) const {
-    if (x_[idx] == T(0)) {
-      dx_[idx] = T(0);
-    } else {
-      const math::Real<T> r_square =
-          x_[idx].real * x_[idx].real + x_[idx].imag * x_[idx].imag;
-      dx_[idx] = T(-dout_[idx] * x_[idx].imag / r_square,
-                   dout_[idx] * x_[idx].real / r_square);
-    }
-  }
-
-  const math::Real<T>* dout_;
-  const T* x_;
-  T* dx_;
-  int64_t numel_;
-};
-
-// angle grad for real
-template <typename T>
-struct AngleGradFunctor<T, NoComplex<T, Real<T>>> {
-  AngleGradFunctor(const math::Real<T>* dout, const T* x, T* dx, int64_t numel)
-      : dout_(dout), x_(x), dx_(dx), numel_(numel) {}
-
-  HOSTDEVICE void operator()(int64_t idx) const { dx_[idx] = 0; }
-
-  const math::Real<T>* dout_;
-  const T* x_;
-  T* dx_;
-  int64_t numel_;
-};
-}  // namespace math
-
 using Tensor = framework::Tensor;
 template <typename DeviceContext, typename T>
 class AngleKernel : public framework::OpKernel<T> {
@@ -111,12 +36,12 @@ class AngleKernel : public framework::OpKernel<T> {
 
     auto numel = x->numel();
     auto* x_data = x->data<T>();
-    auto* out_data = out->mutable_data<math::Real<T>>(
-        context.GetPlace(), size_t(x->numel() * sizeof(math::Real<T>)));
+    auto* out_data = out->mutable_data<pten::funcs::Real<T>>(
+        context.GetPlace(), size_t(x->numel() * sizeof(pten::funcs::Real<T>)));
 
     auto& dev_ctx = context.template device_context<DeviceContext>();
     platform::ForRange<DeviceContext> for_range(dev_ctx, numel);
-    math::AngleFunctor<T> functor(x_data, out_data, numel);
+    pten::funcs::AngleFunctor<T> functor(x_data, out_data, numel);
     for_range(functor);
   }
 };
@@ -132,14 +57,14 @@ class AngleGradKernel : public framework::OpKernel<T> {
         ctx.Output<framework::Tensor>(framework::GradVarName("X"));
 
     auto numel = d_out->numel();
-    auto* dout_data = d_out->data<math::Real<T>>();
+    auto* dout_data = d_out->data<pten::funcs::Real<T>>();
     auto* x_data = x->data<T>();
     auto* dx_data = d_x->mutable_data<T>(
         ctx.GetPlace(), static_cast<size_t>(numel * sizeof(T)));
 
     auto& dev_ctx = ctx.template device_context<DeviceContext>();
     platform::ForRange<DeviceContext> for_range(dev_ctx, numel);
-    math::AngleGradFunctor<T> functor(dout_data, x_data, dx_data, numel);
+    pten::funcs::AngleGradFunctor<T> functor(dout_data, x_data, dx_data, numel);
     for_range(functor);
   }
 };

diff --git a/paddle/fluid/operators/cholesky_solve_op.h b/paddle/fluid/operators/cholesky_solve_op.h
@@ -64,7 +64,7 @@ void cholesky_solve_fn(const paddle::framework::ExecutionContext &ctx,
   // calculate u's conjugate for complex
   framework::Tensor u_conj(u_bst.type());
   platform::ForRange<DeviceContext> u_for_range(dev_ctx, u_bst.numel());
-  math::ConjFunctor<T> u_functor(
+  pten::funcs::ConjFunctor<T> u_functor(
       u_bst.data<T>(), u_bst.numel(),
       u_conj.mutable_data<T>(u_bst.dims(), dev_ctx.GetPlace()));
   u_for_range(u_functor);
@@ -73,7 +73,7 @@ void cholesky_solve_fn(const paddle::framework::ExecutionContext &ctx,
   // calculate b's conjugate for complex
   framework::Tensor b_conj(b_bst.type());
   platform::ForRange<DeviceContext> b_for_range(dev_ctx, b_bst.numel());
-  math::ConjFunctor<T> b_functor(
+  pten::funcs::ConjFunctor<T> b_functor(
       b_bst.data<T>(), b_bst.numel(),
       b_conj.mutable_data<T>(b_bst.dims(), dev_ctx.GetPlace()));
   b_for_range(b_functor);
@@ -113,7 +113,7 @@ void cholesky_solve_fn(const paddle::framework::ExecutionContext &ctx,
 
   // calculate out's conjugate for complex
   platform::ForRange<DeviceContext> out_for_range(dev_ctx, out->numel());
-  math::ConjFunctor<T> out_functor(
+  pten::funcs::ConjFunctor<T> out_functor(
       out->data<T>(), out->numel(),
       out->mutable_data<T>(out->dims(), dev_ctx.GetPlace()));
   out_for_range(out_functor);
@@ -173,7 +173,7 @@ class CholeskySolveGradKernel : public framework::OpKernel<T> {
       // calculate out's conjugate for complex
       framework::Tensor out_conj(out->type());
       platform::ForRange<DeviceContext> out_for_range(dev_ctx, out->numel());
-      math::ConjFunctor<T> out_functor(
+      pten::funcs::ConjFunctor<T> out_functor(
           out->data<T>(), out->numel(),
           out_conj.mutable_data<T>(out->dims(), dev_ctx.GetPlace()));
       out_for_range(out_functor);
@@ -195,7 +195,7 @@ class CholeskySolveGradKernel : public framework::OpKernel<T> {
       framework::Tensor commonterm_conj(commonterm.type());
       platform::ForRange<DeviceContext> commonterm_for_range(
           dev_ctx, commonterm.numel());
-      math::ConjFunctor<T> commonterm_functor(
+      pten::funcs::ConjFunctor<T> commonterm_functor(
           commonterm.data<T>(), commonterm.numel(),
           commonterm_conj.mutable_data<T>(commonterm.dims(),
                                           dev_ctx.GetPlace()));

diff --git a/paddle/fluid/operators/complex_op.h b/paddle/fluid/operators/complex_op.h
@@ -16,8 +16,8 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
-#include "paddle/fluid/operators/math/complex_functors.h"
 #include "paddle/fluid/platform/complex.h"
+#include "paddle/pten/kernels/funcs/complex_functors.h"
 
 namespace paddle {
 namespace operators {

diff --git a/paddle/fluid/operators/complex_view_op.h b/paddle/fluid/operators/complex_view_op.h
@@ -17,9 +17,9 @@
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
-#include "paddle/fluid/operators/math/complex_functors.h"
 #include "paddle/fluid/platform/complex.h"
 #include "paddle/fluid/platform/for_range.h"
+#include "paddle/pten/kernels/funcs/complex_functors.h"
 
 namespace paddle {
 namespace operators {

diff --git a/paddle/fluid/operators/cumprod_op.cu b/paddle/fluid/operators/cumprod_op.cu
@@ -14,9 +14,9 @@
 
 #include <thrust/transform.h>
 #include "paddle/fluid/operators/cumprod_op.h"
-#include "paddle/fluid/operators/math/complex_functors.h"
 #include "paddle/fluid/operators/math/inclusive_scan.h"
 #include "paddle/fluid/platform/for_range.h"
+#include "paddle/pten/kernels/funcs/complex_functors.h"
 
 namespace paddle {
 namespace operators {
@@ -243,12 +243,12 @@ class CumprodGradOpCUDAKernel : public framework::OpKernel<T> {
 
       platform::ForRange<platform::CUDADeviceContext> for_range_x(dev_ctx,
                                                                   numel);
-      math::ConjFunctor<T> functor_x(x_data, numel, x_data_conj);
+      pten::funcs::ConjFunctor<T> functor_x(x_data, numel, x_data_conj);
       for_range_x(functor_x);
 
       platform::ForRange<platform::CUDADeviceContext> for_range_y(dev_ctx,
                                                                   numel);
-      math::ConjFunctor<T> functor_y(y_data, numel, y_data_conj);
+      pten::funcs::ConjFunctor<T> functor_y(y_data, numel, y_data_conj);
       for_range_y(functor_y);
       x_data_deal = x_data_conj;
       y_data_deal = y_data_conj;

diff --git a/paddle/fluid/operators/cumprod_op.h b/paddle/fluid/operators/cumprod_op.h
@@ -18,8 +18,8 @@
 #include <type_traits>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
-#include "paddle/fluid/operators/math/complex_functors.h"
 #include "paddle/fluid/platform/for_range.h"
+#include "paddle/pten/kernels/funcs/complex_functors.h"
 
 namespace paddle {
 namespace operators {
@@ -124,12 +124,12 @@ class CumprodGradOpCPUKernel : public framework::OpKernel<T> {
 
       platform::ForRange<platform::CPUDeviceContext> for_range_x(dev_ctx,
                                                                  numel);
-      math::ConjFunctor<T> functor_x(x_data, numel, x_data_conj);
+      pten::funcs::ConjFunctor<T> functor_x(x_data, numel, x_data_conj);
       for_range_x(functor_x);
 
       platform::ForRange<platform::CPUDeviceContext> for_range_out(dev_ctx,
                                                                    numel);
-      math::ConjFunctor<T> functor_out(out_data, numel, out_data_conj);
+      pten::funcs::ConjFunctor<T> functor_out(out_data, numel, out_data_conj);
       for_range_out(functor_out);
 
       x_data_deal = x_data_conj;

diff --git a/paddle/fluid/operators/determinant_op.h b/paddle/fluid/operators/determinant_op.h
@@ -19,11 +19,11 @@
 #include <cmath>
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/math/complex_functors.h"
 #include "paddle/fluid/operators/math/matrix_inverse.h"
 #include "paddle/fluid/operators/svd_helper.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/for_range.h"
+#include "paddle/pten/kernels/funcs/complex_functors.h"
 
 namespace paddle {
 namespace operators {
@@ -395,7 +395,7 @@ class SlogDeterminantGradKernel : public framework::OpKernel<T> {
                                                      size_t(numel * sizeof(T)));
 
     platform::ForRange<DeviceContext> for_range(dev_ctx, numel);
-    math::ConjFunctor<T> functor(inverse_A.data<T>(), numel, conj_data);
+    pten::funcs::ConjFunctor<T> functor(inverse_A.data<T>(), numel, conj_data);
     for_range(functor);
 
     VLOG(3) << "inverse(A).conj() dims: " << conj_inverse_A.dims();

diff --git a/paddle/fluid/operators/dot_op.h b/paddle/fluid/operators/dot_op.h
@@ -16,8 +16,8 @@
 
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
-#include "paddle/fluid/operators/math/complex_functors.h"
 #include "paddle/fluid/platform/for_range.h"
+#include "paddle/pten/kernels/funcs/complex_functors.h"
 
 // only can include the headers in paddle/pten/api dirs
 #include "paddle/pten/api/lib/utils/tensor_utils.h"

diff --git a/paddle/fluid/operators/eig_op.h b/paddle/fluid/operators/eig_op.h
@@ -17,12 +17,12 @@
 #include <math.h>
 #include <algorithm>
 #include <complex>
-#include "paddle/fluid/operators/math/complex_functors.h"
 #include "paddle/fluid/operators/math/lapack_function.h"
 #include "paddle/fluid/operators/math/matrix_solve.h"
 #include "paddle/fluid/operators/svd_helper.h"
 #include "paddle/fluid/operators/transpose_op.h"
 #include "paddle/fluid/platform/for_range.h"
+#include "paddle/pten/kernels/funcs/complex_functors.h"
 #include "paddle/pten/kernels/funcs/math_function.h"
 #define EPSILON 1e-6
 
@@ -87,18 +87,19 @@ void LapackEig(Tensor* input, Tensor* values, Tensor* vectors, int info,
   int values_stride = values->dims()[values->dims().size() - 1];
 
   Tensor rwork;
-  math::Real<T>* rwork_data = nullptr;
+  pten::funcs::Real<T>* rwork_data = nullptr;
 
   rwork.Resize(framework::make_ddim({lda * 2}));
-  rwork_data = rwork.mutable_data<math::Real<T>>(context.GetPlace());
+  rwork_data = rwork.mutable_data<pten::funcs::Real<T>>(context.GetPlace());
 
   // call lapackEig once to compute the size of work;
   T computed_work_size;
-  math::lapackEig<T, math::Real<T>>(
+  math::lapackEig<T, pten::funcs::Real<T>>(
       jobvl, jobvr, order, input_data, lda, values_data, lvector_data, ldvl,
       rvector_data, ldvr, &computed_work_size, lwork, rwork_data, &info);
 
-  lwork = std::max<int>(1, static_cast<int>(math::Real<T>(computed_work_size)));
+  lwork = std::max<int>(
+      1, static_cast<int>(pten::funcs::Real<T>(computed_work_size)));
   Tensor work;
   work.Resize(framework::make_ddim({lwork}));
   T* work_data = work.mutable_data<T>(context.GetPlace());
@@ -108,7 +109,7 @@ void LapackEig(Tensor* input, Tensor* values, Tensor* vectors, int info,
     T* current_values = &values_data[i * values_stride];
     T* current_rvectors = &rvector_data[i * matrix_stride];
 
-    math::lapackEig<T, math::Real<T>>(
+    math::lapackEig<T, pten::funcs::Real<T>>(
         jobvl, jobvr, order, current_matrix, lda, current_values, lvector_data,
         ldvl, current_rvectors, ldvr, work_data, lwork, rwork_data, &info);
     PADDLE_ENFORCE_EQ(
@@ -207,26 +208,27 @@ class EigKernel : public framework::OpKernel<T> {
       origin_dim.push_back(last_item * 2);
       framework::DDim big_dim = framework::make_ddim(origin_dim);
 
-      real_values.mutable_data<math::Real<T>>(big_dim, context.GetPlace());
-      real_vectors.mutable_data<math::Real<T>>(x->dims(), context.GetPlace());
+      real_values.mutable_data<pten::funcs::Real<T>>(big_dim,
+                                                     context.GetPlace());
+      real_vectors.mutable_data<pten::funcs::Real<T>>(x->dims(),
+                                                      context.GetPlace());
 
-      ApplyEigKernel<DeviceContext, math::Real<T>>(*x, &real_values,
-                                                   &real_vectors, context);
-      auto dito =
-          math::DeviceIndependenceTensorOperations<DeviceContext, math::Real<T>,
-                                                   Tout>(context);
+      ApplyEigKernel<DeviceContext, pten::funcs::Real<T>>(
+          *x, &real_values, &real_vectors, context);
+      auto dito = math::DeviceIndependenceTensorOperations<
+          DeviceContext, pten::funcs::Real<T>, Tout>(context);
 
       // 1. extract real part & imag part from real_values
       Tensor real_part = dito.Slice(real_values, {-1}, {0}, {order});
       Tensor imag_part = dito.Slice(real_values, {-1}, {order}, {order * 2});
 
       // 2. construct complex values
-      auto* real_part_data = real_part.data<math::Real<T>>();
-      auto* imag_part_data = imag_part.data<math::Real<T>>();
+      auto* real_part_data = real_part.data<pten::funcs::Real<T>>();
+      auto* imag_part_data = imag_part.data<pten::funcs::Real<T>>();
       int out_values_numel = out_values->numel();
       platform::ForRange<DeviceContext> for_range(
           context.template device_context<DeviceContext>(), out_values_numel);
-      math::RealImagToComplexFunctor<Tout> functor(
+      pten::funcs::RealImagToComplexFunctor<Tout> functor(
           real_part_data, imag_part_data,
           out_values->mutable_data<Tout>(context.GetPlace()), out_values_numel);
       for_range(functor);
@@ -235,7 +237,7 @@ class EigKernel : public framework::OpKernel<T> {
       Tensor real_vector_trans = dito.Transpose(real_vectors);
       Tensor out_vectors_trans;
       out_vectors_trans.mutable_data<Tout>(x->dims(), context.GetPlace());
-      ConstructComplexVectors<math::Real<T>, Tout>(
+      ConstructComplexVectors<pten::funcs::Real<T>, Tout>(
           &out_vectors_trans, *out_values, real_vector_trans, context,
           batch_count, order);
       TransposeTwoAxis<DeviceContext, Tout>(out_vectors_trans, out_vectors,
@@ -271,14 +273,14 @@ void ComputeBackwardForComplexInput(
   // turn diag_unsqueezed into complex
   auto numel = diag_unsqueezed.numel();
   Tensor diag_unsqueezed_complex;
-  auto* data_diag_un = diag_unsqueezed.data<math::Real<Tout>>();
+  auto* data_diag_un = diag_unsqueezed.data<pten::funcs::Real<Tout>>();
   auto* data_diag_un_com = diag_unsqueezed_complex.mutable_data<Tout>(
       diag_unsqueezed.dims(), context.GetPlace(),
       static_cast<size_t>(numel * sizeof(Tout)));
   auto& dev_ctx = context.template device_context<DeviceContext>();
   platform::ForRange<DeviceContext> for_range(dev_ctx, numel);
-  math::RealToComplexFunctor<Tout> functor(data_diag_un, data_diag_un_com,
-                                           numel);
+  pten::funcs::RealToComplexFunctor<Tout> functor(data_diag_un,
+                                                  data_diag_un_com, numel);
   for_range(functor);
   // real tensor multiply complex tensor in broadcast manner
   Tensor res1 = dito.RealMulComplex(V, diag_unsqueezed_complex);