add functions

juliusshufan · Apr 18, 2019 · 672be6a · 672be6a
1 parent 495ce36
commit 672be6a
Show file tree

Hide file tree

Showing 2 changed files with 146 additions and 53 deletions.
diff --git a/src/operator/mkl_functions-inl.h b/src/operator/mkl_functions-inl.h
@@ -44,36 +44,31 @@ static bool check_type(const int t) {
   return (t == mshadow::kFloat32 || t == mshadow::kFloat64);
 }
 
-#define MXNET_MKL_UNARY_MATH_FUNC(name, func)                                          \
-  struct name : public mxnet_op::tunable {                                             \
-    template <typename DType>                                                          \
-    MSHADOW_XINLINE static void Map(const index_t n, const DType *src, float *dst) {   \
-      vs##func(static_cast<MKL_INT>(n), reinterpret_cast<const float *>(src), dst);    \
-    }                                                                                  \
-    MSHADOW_XINLINE static void Map(const index_t n, const double *src, double *dst) { \
-      vd##func(static_cast<MKL_INT>(n), src, dst);                                     \
-    }                                                                                  \
-  }
+#define MXNET_MKL_UNARY_MATH_FUNC(name, func)                                               \
+struct name {                                                                               \
+  MSHADOW_XINLINE static void Vectorize(const index_t n, const float *src, float *dst) {    \
+    vs##func(static_cast<MKL_INT>(n), src, dst);                                            \
+  }                                                                                         \
+  MSHADOW_XINLINE static void Vectorize(const index_t n, const double *src, double *dst) {  \
+    vd##func(static_cast<MKL_INT>(n), src, dst);                                            \
+  }                                                                                         \
+};
 
-#define MXNET_MKL_BINARY_MATH_FUNC(name, func)                                         \
-  struct name : public mxnet_op::tunable {                                             \
-    template <typename DType>                                                          \
-    MSHADOW_XINLINE static void Map(const index_t n,                                   \
-                                    const DType *a,                                    \
-                                    const DType *b,                                    \
-                                    float *c) {                                        \
-      vs##func(static_cast<MKL_INT>(n),                                                \
-               reinterpret_cast<const float *>(a),                                     \
-               reinterpret_cast<const float *>(b),                                     \
-               c);                                                                     \
-    }                                                                                  \
-    MSHADOW_XINLINE static void Map(const index_t n,                                   \
-                                    const double *a,                                   \
-                                    const double *b,                                   \
-                                    double *c) {                                       \
-      vd##func(static_cast<MKL_INT>(n), a, b, c);                                      \
-    }                                                                                  \
-  }
+#define MXNET_MKL_BINARY_MATH_FUNC(name, func)                                        \
+struct name {                                                                         \
+  MSHADOW_XINLINE static void Vectorize(const index_t n,                              \
+                                        const float *a,                               \
+                                        const float *b,                               \
+                                        float *c) {                                   \
+    vs##func(static_cast<MKL_INT>(n), a, b, c);                                       \
+  }                                                                                   \
+  MSHADOW_XINLINE static void Vectorize(const index_t n,                              \
+                                        const double *a,                              \
+                                        const double *b,                              \
+                                        double *c) {                                  \
+    vd##func(static_cast<MKL_INT>(n), a, b, c);                                       \
+  }                                                                                   \
+};
 
 MXNET_MKL_UNARY_MATH_FUNC(erf, Erf);
 MXNET_MKL_UNARY_MATH_FUNC(exp, Exp);
@@ -118,6 +113,104 @@ MXNET_MKL_BINARY_MATH_FUNC(pow, Pow);
 MXNET_MKL_BINARY_MATH_FUNC(hypot, Hypot);
 
 
+template <typename DType>
+MSHADOW_XINLINE static void sub_(index_t n, DType *in, DType b, DType *dst) {
+  for (index_t i = 0; i < n; i++)
+    dst[i] = in[i] - b;
+}
+
+template <typename DType>
+MSHADOW_XINLINE static void div_(index_t n, DType *in, DType b, DType *dst) {
+  for (index_t i = 0; i < n; i++)
+    dst[i] = in[i] / b;
+}
+
+template <typename DType>
+MSHADOW_XINLINE static void sum_(index_t n, DType *in, DType *dst) {
+  // dst[0] = cblas_sasum(n, in, 1);
+  DType sum = 0.0f;
+  for (index_t i = 0; i < n; i++)
+    sum += in[i];
+
+  dst[0] = sum;
+}
+
+template <typename DType>
+MSHADOW_XINLINE static void max_(int n, DType * __restrict__ in, DType *dst) {
+  dst[0] = in[0];
+  for (int i = 1; i < n; i++)
+    dst[0] = (dst[0] < in[i]) ? in[i] : dst[0];
+}
+
+// LayerNorm on the last dimension
+template <typename DType>
+MSHADOW_XINLINE static void LayerNormLastDim(const index_t m,
+                                             const index_t n,
+                                             const DType *a,
+                                             const DType *b,
+                                             const DType *ws,
+                                             const DType *gamma,
+                                             const DType *beta,
+                                             const DType *mean,
+                                             const DType *var,
+                                             const DType eps) {
+#pragma omp parallel for
+  for (index_t i = 0; i < m; i++) {
+    DType* in_offset = a + i * n;
+    DType* out_offset = b + i * n;
+    DType* ws_offset = ws + i * n;
+
+    sum_(n, in_offset, &(mean[i]));
+    mean[i] /= n;
+    sub_(n, in_offset, mean[i], out_offset);
+    square(n, out_offset, ws_offset);
+    sum_(n, ws_offset, &(var[i]));
+    var[i] = sqrt(var[i] / n + eps);
+
+    mul(n, out_offset, gamma, out_offset);
+    div_(n, out_offset, var[i], out_offset);
+    add(n, out_offset, beta, out_offset);
+  }
+}
+
+// softmax on the last dimension
+template <typename DType>
+MSHADOW_XINLINE static void SoftmaxLastDim(const index_t m,
+                                           const index_t n,
+                                           const DType *a,
+                                           const DType *b) {
+#pragma omp paralle for
+  for (index_t i = 0; i < m; i++) {
+    DType* in_offset = a + i * n;
+    DType* out_offset = b + i * n;
+
+    exp(n, in_offset, out_offset);
+    float sum = 0.0f;
+    sum_(n, out_offset, &sum);
+    div_(n, out_offset, sum, out_offset);
+  }
+}
+
+template <typename DType>
+MSHADOW_XINLINE static void LogSoftmaxLastDim(const index_t m,
+                                              const index_t n,
+                                              const DType *a,
+                                              const DType *b) {
+#pragma parallel for
+  for (index_t i = 0; i < m; i++) {
+    DType* in_offset = a + i * n;
+    DType* out_offset = b + i * n;
+
+    DType b, logsum;
+    max_(n, in_offset, &b);
+    sub_(n, in_offset, b, out_offset);
+    exp(n, out_offset, out_offset);
+    sum_(n, out_offset, &logsum);
+    logsum = b + logf(logsum);
+    sub_(n, in_offset, logsum, out_offset);
+  }
+}
+
 }  // namespace mkl_func
 }  // namespace op
 }  // namespace mxnet

diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h
@@ -390,7 +390,7 @@ class UnaryOp : public OpBase {
         mkl_func::check_type(type_flag)) {
       // set DType as float or double according to type_flag
       MSHADOW_SGL_DBL_TYPE_SWITCH(type_flag, DType, {
-        MKL_OP::Map(input_size, inputs[0].dptr<DType>(), outputs[0].dptr<DType>());
+        MKL_OP::Vectorize(input_size, inputs[0].dptr<DType>(), outputs[0].dptr<DType>());
       });
     } else {
       Compute<cpu, OP>(attrs, ctx, inputs, req, outputs);
@@ -562,7 +562,7 @@ struct ReshapeLikeParam : public dmlc::Parameter<ReshapeLikeParam> {
   NNVM_REGISTER_OP(__name$)                                         \
   .set_num_inputs(1)                                                \
   .set_num_outputs(1)                                               \
-  .set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)  \
+  .set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>) \
   .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)     \
   .set_attr<nnvm::FInplaceOption>("FInplaceOption",                 \
     [](const NodeAttrs& attrs){                                     \
@@ -578,7 +578,7 @@ struct ReshapeLikeParam : public dmlc::Parameter<ReshapeLikeParam> {
   NNVM_REGISTER_OP(__name$)                                         \
   .set_num_inputs(1)                                                \
   .set_num_outputs(1)                                               \
-  .set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)  \
+  .set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>) \
   .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)     \
   .set_attr<nnvm::FInplaceOption>("FInplaceOption",                 \
     [](const NodeAttrs& attrs){                                     \
@@ -591,27 +591,27 @@ struct ReshapeLikeParam : public dmlc::Parameter<ReshapeLikeParam> {
    *  *  With this macro means mxnet compile with MKL to accelerate math function with mkl.
    *   *  Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function.
   */
-  #define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(__name$, __xpu$, __kernel$, __mkl_kernel$)      \
-    MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$)                                                           \
-    MXNET_ADD_SPARSE_OP_ALIAS(__name$)                                                                   \
-    .set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>)      \
-    .set_attr<FCompute>("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>)         \
-    .set_attr<FComputeEx>("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kernel$>)
-
-  /*! \bried MKL Unary compute.
-   *  *  With this macro means mxnet compile with MKL to accelerate math function with mkl.
-   *   *  Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function.
-  */
-  #define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP(__name$, __xpu$, __kernel$, __mkl_kernel$)          \
-    MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$)                                                           \
-    MXNET_ADD_SPARSE_OP_ALIAS(__name$)                                                                   \
-    .set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, false>)     \
-    .set_attr<FCompute>("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>)         \
-    .set_attr<FComputeEx>("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kerbel$>)
-
-  #define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(__name$, __xpu$, __kernel$, __mkl_kernel$)    \
-    MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$)                                                           \
-    .set_attr<FCompute>("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>)
+#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(__name$, __xpu$, __kernel$, __mkl_kernel$) \
+  MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$)                                                      \
+  MXNET_ADD_SPARSE_OP_ALIAS(__name$)                                                              \
+  .set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>) \
+  .set_attr<FCompute>("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>)    \
+  .set_attr<FComputeEx>("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kernel$>)
+
+/*! \bried MKL Unary compute.
+ *  *  With this macro means mxnet compile with MKL to accelerate math function with mkl.
+ *   *  Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function.
+*/
+#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP(__name$, __xpu$, __kernel$, __mkl_kernel$)     \
+  MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$)                                                      \
+  MXNET_ADD_SPARSE_OP_ALIAS(__name$)                                                              \
+  .set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, false>)\
+  .set_attr<FCompute>("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>)    \
+  .set_attr<FComputeEx>("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kerbel$>)
+
+#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(__name$, __xpu$, __kernel$, __mkl_kernel$)\
+  MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$)                                                       \
+  .set_attr<FCompute>("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>)
 #endif
 
 /*! \brief Unary compute, with FComputeEx for csr and rsp available  */