From 672be6a715b59a6c01f745a039d758c8ecf325fe Mon Sep 17 00:00:00 2001 From: Tao Lv Date: Thu, 18 Apr 2019 14:39:26 +0800 Subject: [PATCH] add functions --- src/operator/mkl_functions-inl.h | 151 +++++++++++++++++++----- src/operator/tensor/elemwise_unary_op.h | 48 ++++---- 2 files changed, 146 insertions(+), 53 deletions(-) diff --git a/src/operator/mkl_functions-inl.h b/src/operator/mkl_functions-inl.h index f3615f4ad17e..b224d08ab126 100644 --- a/src/operator/mkl_functions-inl.h +++ b/src/operator/mkl_functions-inl.h @@ -44,36 +44,31 @@ static bool check_type(const int t) { return (t == mshadow::kFloat32 || t == mshadow::kFloat64); } -#define MXNET_MKL_UNARY_MATH_FUNC(name, func) \ - struct name : public mxnet_op::tunable { \ - template \ - MSHADOW_XINLINE static void Map(const index_t n, const DType *src, float *dst) { \ - vs##func(static_cast(n), reinterpret_cast(src), dst); \ - } \ - MSHADOW_XINLINE static void Map(const index_t n, const double *src, double *dst) { \ - vd##func(static_cast(n), src, dst); \ - } \ - } +#define MXNET_MKL_UNARY_MATH_FUNC(name, func) \ +struct name { \ + MSHADOW_XINLINE static void Vectorize(const index_t n, const float *src, float *dst) { \ + vs##func(static_cast(n), src, dst); \ + } \ + MSHADOW_XINLINE static void Vectorize(const index_t n, const double *src, double *dst) { \ + vd##func(static_cast(n), src, dst); \ + } \ +}; -#define MXNET_MKL_BINARY_MATH_FUNC(name, func) \ - struct name : public mxnet_op::tunable { \ - template \ - MSHADOW_XINLINE static void Map(const index_t n, \ - const DType *a, \ - const DType *b, \ - float *c) { \ - vs##func(static_cast(n), \ - reinterpret_cast(a), \ - reinterpret_cast(b), \ - c); \ - } \ - MSHADOW_XINLINE static void Map(const index_t n, \ - const double *a, \ - const double *b, \ - double *c) { \ - vd##func(static_cast(n), a, b, c); \ - } \ - } +#define MXNET_MKL_BINARY_MATH_FUNC(name, func) \ +struct name { \ + MSHADOW_XINLINE static void Vectorize(const index_t n, \ + const float *a, \ + const float *b, \ + float *c) { \ + vs##func(static_cast(n), a, b, c); \ + } \ + MSHADOW_XINLINE static void Vectorize(const index_t n, \ + const double *a, \ + const double *b, \ + double *c) { \ + vd##func(static_cast(n), a, b, c); \ + } \ +}; MXNET_MKL_UNARY_MATH_FUNC(erf, Erf); MXNET_MKL_UNARY_MATH_FUNC(exp, Exp); @@ -118,6 +113,104 @@ MXNET_MKL_BINARY_MATH_FUNC(pow, Pow); MXNET_MKL_BINARY_MATH_FUNC(hypot, Hypot); +template +MSHADOW_XINLINE static void sub_(index_t n, DType *in, DType b, DType *dst) { + for (index_t i = 0; i < n; i++) + dst[i] = in[i] - b; +} + +template +MSHADOW_XINLINE static void div_(index_t n, DType *in, DType b, DType *dst) { + for (index_t i = 0; i < n; i++) + dst[i] = in[i] / b; +} + +template +MSHADOW_XINLINE static void sum_(index_t n, DType *in, DType *dst) { + // dst[0] = cblas_sasum(n, in, 1); + DType sum = 0.0f; + for (index_t i = 0; i < n; i++) + sum += in[i]; + + dst[0] = sum; +} + +template +MSHADOW_XINLINE static void max_(int n, DType * __restrict__ in, DType *dst) { + dst[0] = in[0]; + for (int i = 1; i < n; i++) + dst[0] = (dst[0] < in[i]) ? in[i] : dst[0]; +} + +// LayerNorm on the last dimension +template +MSHADOW_XINLINE static void LayerNormLastDim(const index_t m, + const index_t n, + const DType *a, + const DType *b, + const DType *ws, + const DType *gamma, + const DType *beta, + const DType *mean, + const DType *var, + const DType eps) { +#pragma omp parallel for + for (index_t i = 0; i < m; i++) { + DType* in_offset = a + i * n; + DType* out_offset = b + i * n; + DType* ws_offset = ws + i * n; + + sum_(n, in_offset, &(mean[i])); + mean[i] /= n; + sub_(n, in_offset, mean[i], out_offset); + square(n, out_offset, ws_offset); + sum_(n, ws_offset, &(var[i])); + var[i] = sqrt(var[i] / n + eps); + + mul(n, out_offset, gamma, out_offset); + div_(n, out_offset, var[i], out_offset); + add(n, out_offset, beta, out_offset); + } +} + +// softmax on the last dimension +template +MSHADOW_XINLINE static void SoftmaxLastDim(const index_t m, + const index_t n, + const DType *a, + const DType *b) { +#pragma omp paralle for + for (index_t i = 0; i < m; i++) { + DType* in_offset = a + i * n; + DType* out_offset = b + i * n; + + exp(n, in_offset, out_offset); + float sum = 0.0f; + sum_(n, out_offset, &sum); + div_(n, out_offset, sum, out_offset); + } +} + +template +MSHADOW_XINLINE static void LogSoftmaxLastDim(const index_t m, + const index_t n, + const DType *a, + const DType *b) { +#pragma parallel for + for (index_t i = 0; i < m; i++) { + DType* in_offset = a + i * n; + DType* out_offset = b + i * n; + + DType b, logsum; + max_(n, in_offset, &b); + sub_(n, in_offset, b, out_offset); + exp(n, out_offset, out_offset); + sum_(n, out_offset, &logsum); + logsum = b + logf(logsum); + sub_(n, in_offset, logsum, out_offset); + } +} + } // namespace mkl_func } // namespace op } // namespace mxnet diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index 600803c953f6..d2d221bbd628 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -390,7 +390,7 @@ class UnaryOp : public OpBase { mkl_func::check_type(type_flag)) { // set DType as float or double according to type_flag MSHADOW_SGL_DBL_TYPE_SWITCH(type_flag, DType, { - MKL_OP::Map(input_size, inputs[0].dptr(), outputs[0].dptr()); + MKL_OP::Vectorize(input_size, inputs[0].dptr(), outputs[0].dptr()); }); } else { Compute(attrs, ctx, inputs, req, outputs); @@ -562,7 +562,7 @@ struct ReshapeLikeParam : public dmlc::Parameter { NNVM_REGISTER_OP(__name$) \ .set_num_inputs(1) \ .set_num_outputs(1) \ - .set_attr("FInferShape", ElemwiseShape<1, 1>) \ + .set_attr("FInferShape", ElemwiseShape<1, 1>) \ .set_attr("FInferType", ElemwiseType<1, 1>) \ .set_attr("FInplaceOption", \ [](const NodeAttrs& attrs){ \ @@ -578,7 +578,7 @@ struct ReshapeLikeParam : public dmlc::Parameter { NNVM_REGISTER_OP(__name$) \ .set_num_inputs(1) \ .set_num_outputs(1) \ - .set_attr("FInferShape", ElemwiseShape<1, 1>) \ + .set_attr("FInferShape", ElemwiseShape<1, 1>) \ .set_attr("FInferType", ElemwiseType<1, 1>) \ .set_attr("FInplaceOption", \ [](const NodeAttrs& attrs){ \ @@ -591,27 +591,27 @@ struct ReshapeLikeParam : public dmlc::Parameter { * * With this macro means mxnet compile with MKL to accelerate math function with mkl. * * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function. */ - #define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(__name$, __xpu$, __kernel$, __mkl_kernel$) \ - MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ - MXNET_ADD_SPARSE_OP_ALIAS(__name$) \ - .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>) \ - .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \ - .set_attr("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kernel$>) - - /*! \bried MKL Unary compute. - * * With this macro means mxnet compile with MKL to accelerate math function with mkl. - * * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function. - */ - #define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP(__name$, __xpu$, __kernel$, __mkl_kernel$) \ - MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ - MXNET_ADD_SPARSE_OP_ALIAS(__name$) \ - .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, false>) \ - .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \ - .set_attr("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kerbel$>) - - #define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(__name$, __xpu$, __kernel$, __mkl_kernel$) \ - MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ - .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) +#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(__name$, __xpu$, __kernel$, __mkl_kernel$) \ + MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ + MXNET_ADD_SPARSE_OP_ALIAS(__name$) \ + .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>) \ + .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \ + .set_attr("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kernel$>) + +/*! \bried MKL Unary compute. + * * With this macro means mxnet compile with MKL to accelerate math function with mkl. + * * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function. +*/ +#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP(__name$, __xpu$, __kernel$, __mkl_kernel$) \ + MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ + MXNET_ADD_SPARSE_OP_ALIAS(__name$) \ + .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, false>)\ + .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \ + .set_attr("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kerbel$>) + +#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(__name$, __xpu$, __kernel$, __mkl_kernel$)\ + MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \ + .set_attr("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) #endif /*! \brief Unary compute, with FComputeEx for csr and rsp available */