Skip to content

Commit

Permalink
add functions
Browse files Browse the repository at this point in the history
  • Loading branch information
TaoLv committed Apr 18, 2019
1 parent 495ce36 commit 672be6a
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 53 deletions.
151 changes: 122 additions & 29 deletions src/operator/mkl_functions-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,36 +44,31 @@ static bool check_type(const int t) {
return (t == mshadow::kFloat32 || t == mshadow::kFloat64);
}

#define MXNET_MKL_UNARY_MATH_FUNC(name, func) \
struct name : public mxnet_op::tunable { \
template <typename DType> \
MSHADOW_XINLINE static void Map(const index_t n, const DType *src, float *dst) { \
vs##func(static_cast<MKL_INT>(n), reinterpret_cast<const float *>(src), dst); \
} \
MSHADOW_XINLINE static void Map(const index_t n, const double *src, double *dst) { \
vd##func(static_cast<MKL_INT>(n), src, dst); \
} \
}
#define MXNET_MKL_UNARY_MATH_FUNC(name, func) \
struct name { \
MSHADOW_XINLINE static void Vectorize(const index_t n, const float *src, float *dst) { \
vs##func(static_cast<MKL_INT>(n), src, dst); \
} \
MSHADOW_XINLINE static void Vectorize(const index_t n, const double *src, double *dst) { \
vd##func(static_cast<MKL_INT>(n), src, dst); \
} \
};

#define MXNET_MKL_BINARY_MATH_FUNC(name, func) \
struct name : public mxnet_op::tunable { \
template <typename DType> \
MSHADOW_XINLINE static void Map(const index_t n, \
const DType *a, \
const DType *b, \
float *c) { \
vs##func(static_cast<MKL_INT>(n), \
reinterpret_cast<const float *>(a), \
reinterpret_cast<const float *>(b), \
c); \
} \
MSHADOW_XINLINE static void Map(const index_t n, \
const double *a, \
const double *b, \
double *c) { \
vd##func(static_cast<MKL_INT>(n), a, b, c); \
} \
}
#define MXNET_MKL_BINARY_MATH_FUNC(name, func) \
struct name { \
MSHADOW_XINLINE static void Vectorize(const index_t n, \
const float *a, \
const float *b, \
float *c) { \
vs##func(static_cast<MKL_INT>(n), a, b, c); \
} \
MSHADOW_XINLINE static void Vectorize(const index_t n, \
const double *a, \
const double *b, \
double *c) { \
vd##func(static_cast<MKL_INT>(n), a, b, c); \
} \
};

MXNET_MKL_UNARY_MATH_FUNC(erf, Erf);
MXNET_MKL_UNARY_MATH_FUNC(exp, Exp);
Expand Down Expand Up @@ -118,6 +113,104 @@ MXNET_MKL_BINARY_MATH_FUNC(pow, Pow);
MXNET_MKL_BINARY_MATH_FUNC(hypot, Hypot);


template <typename DType>
MSHADOW_XINLINE static void sub_(index_t n, DType *in, DType b, DType *dst) {
for (index_t i = 0; i < n; i++)
dst[i] = in[i] - b;
}

template <typename DType>
MSHADOW_XINLINE static void div_(index_t n, DType *in, DType b, DType *dst) {
for (index_t i = 0; i < n; i++)
dst[i] = in[i] / b;
}

template <typename DType>
MSHADOW_XINLINE static void sum_(index_t n, DType *in, DType *dst) {
// dst[0] = cblas_sasum(n, in, 1);
DType sum = 0.0f;
for (index_t i = 0; i < n; i++)
sum += in[i];

dst[0] = sum;
}

template <typename DType>
MSHADOW_XINLINE static void max_(int n, DType * __restrict__ in, DType *dst) {
dst[0] = in[0];
for (int i = 1; i < n; i++)
dst[0] = (dst[0] < in[i]) ? in[i] : dst[0];
}

// LayerNorm on the last dimension
template <typename DType>
MSHADOW_XINLINE static void LayerNormLastDim(const index_t m,
const index_t n,
const DType *a,
const DType *b,
const DType *ws,
const DType *gamma,
const DType *beta,
const DType *mean,
const DType *var,
const DType eps) {
#pragma omp parallel for
for (index_t i = 0; i < m; i++) {
DType* in_offset = a + i * n;
DType* out_offset = b + i * n;
DType* ws_offset = ws + i * n;

sum_(n, in_offset, &(mean[i]));
mean[i] /= n;
sub_(n, in_offset, mean[i], out_offset);
square(n, out_offset, ws_offset);
sum_(n, ws_offset, &(var[i]));
var[i] = sqrt(var[i] / n + eps);

mul(n, out_offset, gamma, out_offset);
div_(n, out_offset, var[i], out_offset);
add(n, out_offset, beta, out_offset);
}
}

// softmax on the last dimension
template <typename DType>
MSHADOW_XINLINE static void SoftmaxLastDim(const index_t m,
const index_t n,
const DType *a,
const DType *b) {
#pragma omp paralle for
for (index_t i = 0; i < m; i++) {
DType* in_offset = a + i * n;
DType* out_offset = b + i * n;

exp(n, in_offset, out_offset);
float sum = 0.0f;
sum_(n, out_offset, &sum);
div_(n, out_offset, sum, out_offset);
}
}

template <typename DType>
MSHADOW_XINLINE static void LogSoftmaxLastDim(const index_t m,
const index_t n,
const DType *a,
const DType *b) {
#pragma parallel for
for (index_t i = 0; i < m; i++) {
DType* in_offset = a + i * n;
DType* out_offset = b + i * n;

DType b, logsum;
max_(n, in_offset, &b);
sub_(n, in_offset, b, out_offset);
exp(n, out_offset, out_offset);
sum_(n, out_offset, &logsum);
logsum = b + logf(logsum);
sub_(n, in_offset, logsum, out_offset);
}
}

} // namespace mkl_func
} // namespace op
} // namespace mxnet
Expand Down
48 changes: 24 additions & 24 deletions src/operator/tensor/elemwise_unary_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ class UnaryOp : public OpBase {
mkl_func::check_type(type_flag)) {
// set DType as float or double according to type_flag
MSHADOW_SGL_DBL_TYPE_SWITCH(type_flag, DType, {
MKL_OP::Map(input_size, inputs[0].dptr<DType>(), outputs[0].dptr<DType>());
MKL_OP::Vectorize(input_size, inputs[0].dptr<DType>(), outputs[0].dptr<DType>());
});
} else {
Compute<cpu, OP>(attrs, ctx, inputs, req, outputs);
Expand Down Expand Up @@ -562,7 +562,7 @@ struct ReshapeLikeParam : public dmlc::Parameter<ReshapeLikeParam> {
NNVM_REGISTER_OP(__name$) \
.set_num_inputs(1) \
.set_num_outputs(1) \
.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>) \
.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>) \
.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>) \
.set_attr<nnvm::FInplaceOption>("FInplaceOption", \
[](const NodeAttrs& attrs){ \
Expand All @@ -578,7 +578,7 @@ struct ReshapeLikeParam : public dmlc::Parameter<ReshapeLikeParam> {
NNVM_REGISTER_OP(__name$) \
.set_num_inputs(1) \
.set_num_outputs(1) \
.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>) \
.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>) \
.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>) \
.set_attr<nnvm::FInplaceOption>("FInplaceOption", \
[](const NodeAttrs& attrs){ \
Expand All @@ -591,27 +591,27 @@ struct ReshapeLikeParam : public dmlc::Parameter<ReshapeLikeParam> {
* * With this macro means mxnet compile with MKL to accelerate math function with mkl.
* * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function.
*/
#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(__name$, __xpu$, __kernel$, __mkl_kernel$) \
MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \
MXNET_ADD_SPARSE_OP_ALIAS(__name$) \
.set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>) \
.set_attr<FCompute>("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \
.set_attr<FComputeEx>("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kernel$>)

/*! \bried MKL Unary compute.
* * With this macro means mxnet compile with MKL to accelerate math function with mkl.
* * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function.
*/
#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP(__name$, __xpu$, __kernel$, __mkl_kernel$) \
MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \
MXNET_ADD_SPARSE_OP_ALIAS(__name$) \
.set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, false>) \
.set_attr<FCompute>("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \
.set_attr<FComputeEx>("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kerbel$>)

#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(__name$, __xpu$, __kernel$, __mkl_kernel$) \
MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \
.set_attr<FCompute>("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>)
#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(__name$, __xpu$, __kernel$, __mkl_kernel$) \
MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \
MXNET_ADD_SPARSE_OP_ALIAS(__name$) \
.set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>) \
.set_attr<FCompute>("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \
.set_attr<FComputeEx>("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kernel$>)

/*! \bried MKL Unary compute.
* * With this macro means mxnet compile with MKL to accelerate math function with mkl.
* * Will Register FCompute with UnaryOp::MKL_Compute() to compelet the math function.
*/
#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_RSP(__name$, __xpu$, __kernel$, __mkl_kernel$) \
MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \
MXNET_ADD_SPARSE_OP_ALIAS(__name$) \
.set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, false>)\
.set_attr<FCompute>("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>) \
.set_attr<FComputeEx>("FComputeEx<" #__xpu$ ">", UnaryOp::MKL_ComputeEx<__kernel$, __mkl_kerbel$>)

#define MXNET_MKL_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(__name$, __xpu$, __kernel$, __mkl_kernel$)\
MXNET_MKL_OPERATOR_REGISTER_UNARY(__name$) \
.set_attr<FCompute>("FCompute<" #__xpu$ ">", UnaryOp::MKL_Compute<__kernel$, __mkl_kernel$>)
#endif

/*! \brief Unary compute, with FComputeEx for csr and rsp available */
Expand Down

0 comments on commit 672be6a

Please sign in to comment.