Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Pten] move complex_functors.h #39558

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions paddle/fluid/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
include(operators)

# solve "math constants not defined" problems caused by the order of inclusion
# of <cmath> and the definition of macro _USE_MATH_DEFINES
add_definitions(-D_USE_MATH_DEFINES)

# clean cache and pybind_file content first when rebuild
unset(GLOB_OP_LIB CACHE)
unset(OP_LIBRARY CACHE)
Expand Down
87 changes: 6 additions & 81 deletions paddle/fluid/operators/angle_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#define _USE_MATH_DEFINES
#endif
#include <cmath>
#include "paddle/fluid/operators/math/complex_functors.h"
#include "paddle/pten/kernels/funcs/complex_functors.h"

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
Expand All @@ -26,81 +26,6 @@
namespace paddle {
namespace operators {

namespace math {
template <typename T, typename Enable = void>
struct AngleFunctor;

// angel function for complex
template <typename T>
struct AngleFunctor<T, Complex<T, Real<T>>> {
AngleFunctor(const T* input, Real<T>* output, int64_t numel)
: input_(input), output_(output), numel_(numel) {}

HOSTDEVICE void operator()(int64_t idx) const {
output_[idx] = arg(input_[idx]);
}

const T* input_;
Real<T>* output_;
int64_t numel_;
};

// angel function for real
template <typename T>
struct AngleFunctor<T, NoComplex<T, Real<T>>> {
AngleFunctor(const T* input, T* output, int64_t numel)
: input_(input), output_(output), numel_(numel) {}

HOSTDEVICE void operator()(int64_t idx) const {
output_[idx] = input_[idx] < static_cast<T>(0) ? M_PI : 0;
}

const T* input_;
T* output_;
int64_t numel_;
};

template <typename T, typename Enable = void>
struct AngleGradFunctor;

// angle grad for complex
template <typename T>
struct AngleGradFunctor<T, Complex<T, Real<T>>> {
AngleGradFunctor(const math::Real<T>* dout, const T* x, T* dx, int64_t numel)
: dout_(dout), x_(x), dx_(dx), numel_(numel) {}

HOSTDEVICE void operator()(int64_t idx) const {
if (x_[idx] == T(0)) {
dx_[idx] = T(0);
} else {
const math::Real<T> r_square =
x_[idx].real * x_[idx].real + x_[idx].imag * x_[idx].imag;
dx_[idx] = T(-dout_[idx] * x_[idx].imag / r_square,
dout_[idx] * x_[idx].real / r_square);
}
}

const math::Real<T>* dout_;
const T* x_;
T* dx_;
int64_t numel_;
};

// angle grad for real
template <typename T>
struct AngleGradFunctor<T, NoComplex<T, Real<T>>> {
AngleGradFunctor(const math::Real<T>* dout, const T* x, T* dx, int64_t numel)
: dout_(dout), x_(x), dx_(dx), numel_(numel) {}

HOSTDEVICE void operator()(int64_t idx) const { dx_[idx] = 0; }

const math::Real<T>* dout_;
const T* x_;
T* dx_;
int64_t numel_;
};
} // namespace math

using Tensor = framework::Tensor;
template <typename DeviceContext, typename T>
class AngleKernel : public framework::OpKernel<T> {
Expand All @@ -111,12 +36,12 @@ class AngleKernel : public framework::OpKernel<T> {

auto numel = x->numel();
auto* x_data = x->data<T>();
auto* out_data = out->mutable_data<math::Real<T>>(
context.GetPlace(), size_t(x->numel() * sizeof(math::Real<T>)));
auto* out_data = out->mutable_data<pten::funcs::Real<T>>(
context.GetPlace(), size_t(x->numel() * sizeof(pten::funcs::Real<T>)));

auto& dev_ctx = context.template device_context<DeviceContext>();
platform::ForRange<DeviceContext> for_range(dev_ctx, numel);
math::AngleFunctor<T> functor(x_data, out_data, numel);
pten::funcs::AngleFunctor<T> functor(x_data, out_data, numel);
for_range(functor);
}
};
Expand All @@ -132,14 +57,14 @@ class AngleGradKernel : public framework::OpKernel<T> {
ctx.Output<framework::Tensor>(framework::GradVarName("X"));

auto numel = d_out->numel();
auto* dout_data = d_out->data<math::Real<T>>();
auto* dout_data = d_out->data<pten::funcs::Real<T>>();
auto* x_data = x->data<T>();
auto* dx_data = d_x->mutable_data<T>(
ctx.GetPlace(), static_cast<size_t>(numel * sizeof(T)));

auto& dev_ctx = ctx.template device_context<DeviceContext>();
platform::ForRange<DeviceContext> for_range(dev_ctx, numel);
math::AngleGradFunctor<T> functor(dout_data, x_data, dx_data, numel);
pten::funcs::AngleGradFunctor<T> functor(dout_data, x_data, dx_data, numel);
for_range(functor);
}
};
Expand Down
10 changes: 5 additions & 5 deletions paddle/fluid/operators/cholesky_solve_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void cholesky_solve_fn(const paddle::framework::ExecutionContext &ctx,
// calculate u's conjugate for complex
framework::Tensor u_conj(u_bst.type());
platform::ForRange<DeviceContext> u_for_range(dev_ctx, u_bst.numel());
math::ConjFunctor<T> u_functor(
pten::funcs::ConjFunctor<T> u_functor(
u_bst.data<T>(), u_bst.numel(),
u_conj.mutable_data<T>(u_bst.dims(), dev_ctx.GetPlace()));
u_for_range(u_functor);
Expand All @@ -73,7 +73,7 @@ void cholesky_solve_fn(const paddle::framework::ExecutionContext &ctx,
// calculate b's conjugate for complex
framework::Tensor b_conj(b_bst.type());
platform::ForRange<DeviceContext> b_for_range(dev_ctx, b_bst.numel());
math::ConjFunctor<T> b_functor(
pten::funcs::ConjFunctor<T> b_functor(
b_bst.data<T>(), b_bst.numel(),
b_conj.mutable_data<T>(b_bst.dims(), dev_ctx.GetPlace()));
b_for_range(b_functor);
Expand Down Expand Up @@ -113,7 +113,7 @@ void cholesky_solve_fn(const paddle::framework::ExecutionContext &ctx,

// calculate out's conjugate for complex
platform::ForRange<DeviceContext> out_for_range(dev_ctx, out->numel());
math::ConjFunctor<T> out_functor(
pten::funcs::ConjFunctor<T> out_functor(
out->data<T>(), out->numel(),
out->mutable_data<T>(out->dims(), dev_ctx.GetPlace()));
out_for_range(out_functor);
Expand Down Expand Up @@ -173,7 +173,7 @@ class CholeskySolveGradKernel : public framework::OpKernel<T> {
// calculate out's conjugate for complex
framework::Tensor out_conj(out->type());
platform::ForRange<DeviceContext> out_for_range(dev_ctx, out->numel());
math::ConjFunctor<T> out_functor(
pten::funcs::ConjFunctor<T> out_functor(
out->data<T>(), out->numel(),
out_conj.mutable_data<T>(out->dims(), dev_ctx.GetPlace()));
out_for_range(out_functor);
Expand All @@ -195,7 +195,7 @@ class CholeskySolveGradKernel : public framework::OpKernel<T> {
framework::Tensor commonterm_conj(commonterm.type());
platform::ForRange<DeviceContext> commonterm_for_range(
dev_ctx, commonterm.numel());
math::ConjFunctor<T> commonterm_functor(
pten::funcs::ConjFunctor<T> commonterm_functor(
commonterm.data<T>(), commonterm.numel(),
commonterm_conj.mutable_data<T>(commonterm.dims(),
dev_ctx.GetPlace()));
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/complex_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ limitations under the License. */

#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/operators/math/complex_functors.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/pten/kernels/funcs/complex_functors.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/complex_view_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/math/complex_functors.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/pten/kernels/funcs/complex_functors.h"

namespace paddle {
namespace operators {
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/operators/cumprod_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@

#include <thrust/transform.h>
#include "paddle/fluid/operators/cumprod_op.h"
#include "paddle/fluid/operators/math/complex_functors.h"
#include "paddle/fluid/operators/math/inclusive_scan.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/pten/kernels/funcs/complex_functors.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -243,12 +243,12 @@ class CumprodGradOpCUDAKernel : public framework::OpKernel<T> {

platform::ForRange<platform::CUDADeviceContext> for_range_x(dev_ctx,
numel);
math::ConjFunctor<T> functor_x(x_data, numel, x_data_conj);
pten::funcs::ConjFunctor<T> functor_x(x_data, numel, x_data_conj);
for_range_x(functor_x);

platform::ForRange<platform::CUDADeviceContext> for_range_y(dev_ctx,
numel);
math::ConjFunctor<T> functor_y(y_data, numel, y_data_conj);
pten::funcs::ConjFunctor<T> functor_y(y_data, numel, y_data_conj);
for_range_y(functor_y);
x_data_deal = x_data_conj;
y_data_deal = y_data_conj;
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/operators/cumprod_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
#include <type_traits>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/math/complex_functors.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/pten/kernels/funcs/complex_functors.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -124,12 +124,12 @@ class CumprodGradOpCPUKernel : public framework::OpKernel<T> {

platform::ForRange<platform::CPUDeviceContext> for_range_x(dev_ctx,
numel);
math::ConjFunctor<T> functor_x(x_data, numel, x_data_conj);
pten::funcs::ConjFunctor<T> functor_x(x_data, numel, x_data_conj);
for_range_x(functor_x);

platform::ForRange<platform::CPUDeviceContext> for_range_out(dev_ctx,
numel);
math::ConjFunctor<T> functor_out(out_data, numel, out_data_conj);
pten::funcs::ConjFunctor<T> functor_out(out_data, numel, out_data_conj);
for_range_out(functor_out);

x_data_deal = x_data_conj;
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/determinant_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
#include <cmath>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/complex_functors.h"
#include "paddle/fluid/operators/math/matrix_inverse.h"
#include "paddle/fluid/operators/svd_helper.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/pten/kernels/funcs/complex_functors.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -395,7 +395,7 @@ class SlogDeterminantGradKernel : public framework::OpKernel<T> {
size_t(numel * sizeof(T)));

platform::ForRange<DeviceContext> for_range(dev_ctx, numel);
math::ConjFunctor<T> functor(inverse_A.data<T>(), numel, conj_data);
pten::funcs::ConjFunctor<T> functor(inverse_A.data<T>(), numel, conj_data);
for_range(functor);

VLOG(3) << "inverse(A).conj() dims: " << conj_inverse_A.dims();
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/dot_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/math/complex_functors.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/pten/kernels/funcs/complex_functors.h"

// only can include the headers in paddle/pten/api dirs
#include "paddle/pten/api/lib/utils/tensor_utils.h"
Expand Down
42 changes: 22 additions & 20 deletions paddle/fluid/operators/eig_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
#include <math.h>
#include <algorithm>
#include <complex>
#include "paddle/fluid/operators/math/complex_functors.h"
#include "paddle/fluid/operators/math/lapack_function.h"
#include "paddle/fluid/operators/math/matrix_solve.h"
#include "paddle/fluid/operators/svd_helper.h"
#include "paddle/fluid/operators/transpose_op.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/pten/kernels/funcs/complex_functors.h"
#include "paddle/pten/kernels/funcs/math_function.h"
#define EPSILON 1e-6

Expand Down Expand Up @@ -87,18 +87,19 @@ void LapackEig(Tensor* input, Tensor* values, Tensor* vectors, int info,
int values_stride = values->dims()[values->dims().size() - 1];

Tensor rwork;
math::Real<T>* rwork_data = nullptr;
pten::funcs::Real<T>* rwork_data = nullptr;

rwork.Resize(framework::make_ddim({lda * 2}));
rwork_data = rwork.mutable_data<math::Real<T>>(context.GetPlace());
rwork_data = rwork.mutable_data<pten::funcs::Real<T>>(context.GetPlace());

// call lapackEig once to compute the size of work;
T computed_work_size;
math::lapackEig<T, math::Real<T>>(
math::lapackEig<T, pten::funcs::Real<T>>(
jobvl, jobvr, order, input_data, lda, values_data, lvector_data, ldvl,
rvector_data, ldvr, &computed_work_size, lwork, rwork_data, &info);

lwork = std::max<int>(1, static_cast<int>(math::Real<T>(computed_work_size)));
lwork = std::max<int>(
1, static_cast<int>(pten::funcs::Real<T>(computed_work_size)));
Tensor work;
work.Resize(framework::make_ddim({lwork}));
T* work_data = work.mutable_data<T>(context.GetPlace());
Expand All @@ -108,7 +109,7 @@ void LapackEig(Tensor* input, Tensor* values, Tensor* vectors, int info,
T* current_values = &values_data[i * values_stride];
T* current_rvectors = &rvector_data[i * matrix_stride];

math::lapackEig<T, math::Real<T>>(
math::lapackEig<T, pten::funcs::Real<T>>(
jobvl, jobvr, order, current_matrix, lda, current_values, lvector_data,
ldvl, current_rvectors, ldvr, work_data, lwork, rwork_data, &info);
PADDLE_ENFORCE_EQ(
Expand Down Expand Up @@ -207,26 +208,27 @@ class EigKernel : public framework::OpKernel<T> {
origin_dim.push_back(last_item * 2);
framework::DDim big_dim = framework::make_ddim(origin_dim);

real_values.mutable_data<math::Real<T>>(big_dim, context.GetPlace());
real_vectors.mutable_data<math::Real<T>>(x->dims(), context.GetPlace());
real_values.mutable_data<pten::funcs::Real<T>>(big_dim,
context.GetPlace());
real_vectors.mutable_data<pten::funcs::Real<T>>(x->dims(),
context.GetPlace());

ApplyEigKernel<DeviceContext, math::Real<T>>(*x, &real_values,
&real_vectors, context);
auto dito =
math::DeviceIndependenceTensorOperations<DeviceContext, math::Real<T>,
Tout>(context);
ApplyEigKernel<DeviceContext, pten::funcs::Real<T>>(
*x, &real_values, &real_vectors, context);
auto dito = math::DeviceIndependenceTensorOperations<
DeviceContext, pten::funcs::Real<T>, Tout>(context);

// 1. extract real part & imag part from real_values
Tensor real_part = dito.Slice(real_values, {-1}, {0}, {order});
Tensor imag_part = dito.Slice(real_values, {-1}, {order}, {order * 2});

// 2. construct complex values
auto* real_part_data = real_part.data<math::Real<T>>();
auto* imag_part_data = imag_part.data<math::Real<T>>();
auto* real_part_data = real_part.data<pten::funcs::Real<T>>();
auto* imag_part_data = imag_part.data<pten::funcs::Real<T>>();
int out_values_numel = out_values->numel();
platform::ForRange<DeviceContext> for_range(
context.template device_context<DeviceContext>(), out_values_numel);
math::RealImagToComplexFunctor<Tout> functor(
pten::funcs::RealImagToComplexFunctor<Tout> functor(
real_part_data, imag_part_data,
out_values->mutable_data<Tout>(context.GetPlace()), out_values_numel);
for_range(functor);
Expand All @@ -235,7 +237,7 @@ class EigKernel : public framework::OpKernel<T> {
Tensor real_vector_trans = dito.Transpose(real_vectors);
Tensor out_vectors_trans;
out_vectors_trans.mutable_data<Tout>(x->dims(), context.GetPlace());
ConstructComplexVectors<math::Real<T>, Tout>(
ConstructComplexVectors<pten::funcs::Real<T>, Tout>(
&out_vectors_trans, *out_values, real_vector_trans, context,
batch_count, order);
TransposeTwoAxis<DeviceContext, Tout>(out_vectors_trans, out_vectors,
Expand Down Expand Up @@ -271,14 +273,14 @@ void ComputeBackwardForComplexInput(
// turn diag_unsqueezed into complex
auto numel = diag_unsqueezed.numel();
Tensor diag_unsqueezed_complex;
auto* data_diag_un = diag_unsqueezed.data<math::Real<Tout>>();
auto* data_diag_un = diag_unsqueezed.data<pten::funcs::Real<Tout>>();
auto* data_diag_un_com = diag_unsqueezed_complex.mutable_data<Tout>(
diag_unsqueezed.dims(), context.GetPlace(),
static_cast<size_t>(numel * sizeof(Tout)));
auto& dev_ctx = context.template device_context<DeviceContext>();
platform::ForRange<DeviceContext> for_range(dev_ctx, numel);
math::RealToComplexFunctor<Tout> functor(data_diag_un, data_diag_un_com,
numel);
pten::funcs::RealToComplexFunctor<Tout> functor(data_diag_un,
data_diag_un_com, numel);
for_range(functor);
// real tensor multiply complex tensor in broadcast manner
Tensor res1 = dito.RealMulComplex(V, diag_unsqueezed_complex);
Expand Down
Loading