diff --git a/src/operator/mkl/mkl_batch_norm-inl.h b/src/operator/mkl/mkl_batch_norm-inl.h deleted file mode 100644 index b5967f4de294..000000000000 --- a/src/operator/mkl/mkl_batch_norm-inl.h +++ /dev/null @@ -1,391 +0,0 @@ -/******************************************************************************* -* Copyright 2016 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -* \file mkl_batch_norm-inl.h -* \brief -* \author lingyan.guo@intel.com -* zhenlin.luo@intel.com -* -*******************************************************************************/ -#ifndef MXNET_OPERATOR_MKL_MKL_BATCH_NORM_INL_H_ -#define MXNET_OPERATOR_MKL_MKL_BATCH_NORM_INL_H_ -#include -#include -#include -#include -#include -#include -#include -#include -#include "../operator_common.h" -#include "../mshadow_op.h" -#include "./mkl_util-inl.h" - -namespace mxnet { -namespace op { - -template -class MKLBatchNormOp : public Operator { - public: - explicit MKLBatchNormOp(BatchNormParam param) { - this->param_ = param; - fwd_top_data = MKLData::create(); - fwd_bottom_data = MKLData::create(); - bwd_top_diff = MKLData::create(); - bwd_bottom_diff = MKLData::create(); - scaleShift_space.dptr = NULL; - scaleShiftDiff_space.dptr = NULL; - } - virtual ~MKLBatchNormOp() { - if (batchNormFwdInference != NULL) dnnDelete(batchNormFwdInference); - if (batchNormFwdTraining != NULL) dnnDelete(batchNormFwdTraining); - if (batchNormBwdScaleShift != NULL) dnnDelete(batchNormBwdScaleShift); - dnnLayoutDelete(layout_usr_); - if (scaleShift_space.dptr) - Storage::Get()->Free(scaleShift_space); - if (scaleShiftDiff_space.dptr) - Storage::Get()->Free(scaleShiftDiff_space); - } - static std::string getName() { - return "MKLBatchNormOp"; - } - - private: - void LayerSetUp(const mshadow::Tensor &data, - const mshadow::Tensor &out) { - eps_ = param_.eps; - size_t dim = 4, sizes[4], strides[4]; - channels_ = data.shape_[1]; - height_ = data.shape_[2]; - width_ = data.shape_[3]; - num_ = data.shape_[0]; - - sizes[0] = width_; - sizes[1] = height_; - sizes[2] = channels_; - sizes[3] = num_; - - strides[0] = 1; - strides[1] = sizes[0]; - strides[2] = sizes[0] * sizes[1]; - strides[3] = sizes[0] * sizes[1] * sizes[2]; - - // Names are for debugging only - fwd_bottom_data->name = "fwd_bottom_data @ " + getName(); - fwd_top_data->name = "fwd_top_data @ " + getName(); - bwd_bottom_diff->name = "bwd_bottom_diff @ " + getName(); - bwd_top_diff->name = "bwd_top_diff @ " + getName(); - - dnnError_t e; - e = dnnLayoutCreate(&layout_usr_, dim, sizes, strides); - CHECK_EQ(e, E_SUCCESS); - - fwd_bottom_data->create_user_layout(dim, sizes, strides); - fwd_top_data->create_user_layout(dim, sizes, strides); - bwd_bottom_diff->create_user_layout(dim, sizes, strides); - bwd_top_diff->create_user_layout(dim, sizes, strides); - - // Primitives will be allocated during the first fwd pass - batchNormFwdInference = NULL; - batchNormFwdTraining = NULL; - batchNormBwdScaleShift = NULL; - int scaleShift_size = channels_*2*sizeof(DType); - scaleShift_space = Storage::Get()->Alloc(scaleShift_size, Context::CPU()); - scaleShiftDiff_space = Storage::Get()->Alloc(scaleShift_size, Context::CPU()); - DType * scaleShift_buf = reinterpret_cast(scaleShift_space.dptr); - /*!use_weight_bias_*/ - for (int i = 0; i < channels_; i++) { - scaleShift_buf[i] = 1.0; - scaleShift_buf[channels_ + i] = 0; - } - } - - public: - virtual void Forward(const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data, - const std::vector &aux_states) { - using namespace mshadow; - using namespace mshadow::expr; - CHECK_EQ(in_data.size(), 3); - CHECK_EQ(aux_states.size(), 2); - if (ctx.is_train) { - CHECK_EQ(out_data.size(), 3); - CHECK_EQ(req.size(), 3); - } else { - CHECK_GE(out_data.size(), 1); - CHECK_GE(req.size(), 1); - CHECK_EQ(req[batchnorm::kOut], kWriteTo); - } - - Stream *s = ctx.get_stream(); - Tensor data; - Tensor out; - if (in_data[batchnorm::kData].ndim() == 2) { - Shape<4> dshape = Shape4(in_data[batchnorm::kData].shape_[0], - in_data[batchnorm::kData].shape_[1], 1, 1); - data = mkl_experimental_direct_get_with_shape( - in_data[batchnorm::kData], dshape, s); - out = mkl_experimental_direct_get_with_shape( - out_data[batchnorm::kOut], dshape, s); - } else { - data = mkl_experimental_direct_get(in_data[batchnorm::kData], s); - out = mkl_experimental_direct_get(out_data[batchnorm::kOut], s); - } - - // const real_t scale = static_cast(in_data[batchnorm::kData].shape_[1]) / - // static_cast(in_data[batchnorm::kData].shape_.Size()); - - Tensor slope = in_data[batchnorm::kGamma].get(s); - Tensor bias = in_data[batchnorm::kBeta].get(s); - Tensor moving_mean = aux_states[batchnorm::kMovingMean].get(s); - Tensor moving_var = aux_states[batchnorm::kMovingVar].get(s); - - if (param_.fix_gamma) - slope = 1.f; - - dnnError_t e; - if (!init_mkldnn_) { - LayerSetUp(data, out); - init_mkldnn_ = true; - } - void* bottom_data = NULL; -#if MKL_EXPERIMENTAL == 1 - bottom_data = - reinterpret_cast(mkl_prv_data(in_data[batchnorm::kData])); -#endif - int bwd_flags = dnnUseScaleShift; - if (param_.use_global_stats) - bwd_flags = dnnUseScaleShift | dnnUseInputMeanVariance; -#if MKL_EXPERIMENTAL == 1 - if (NULL != bottom_data) { - // Is it the first pass? Create a primitive. - if (batchNormFwdInference == NULL) { - std::shared_ptr bottom_data_mem = in_data[batchnorm::kData].Mkl_mem_; - std::shared_ptr bottom_prv_desc = bottom_data_mem->get_prv_descriptor(); - CHECK(bottom_prv_desc->get_descr_type() == PrvMemDescr::PRV_DESCR_MKL2017); - std::shared_ptr > mem_descr - = std::static_pointer_cast>(bottom_prv_desc); - CHECK(mem_descr != NULL); - fwd_bottom_data = mem_descr; - - e = dnnBatchNormalizationCreateForward_v2( - &batchNormFwdInference, NULL, mem_descr->layout_int, eps_, - dnnUseInputMeanVariance | dnnUseScaleShift); - CHECK_EQ(e, E_SUCCESS); - - e = dnnBatchNormalizationCreateForward_v2( - &batchNormFwdTraining, NULL, mem_descr->layout_int, eps_, - dnnUseScaleShift); - CHECK_EQ(e, E_SUCCESS); - - fwd_top_data->create_internal_layout(batchNormFwdInference, dnnResourceDst); - bwd_top_diff->create_internal_layout(batchNormFwdInference, dnnResourceDst); - bwd_bottom_diff->create_internal_layout(batchNormFwdInference, dnnResourceSrc); - - e = dnnBatchNormalizationCreateBackward_v2( - &batchNormBwdScaleShift, NULL, mem_descr->layout_int, eps_, bwd_flags); - CHECK_EQ(e, E_SUCCESS); - } - } -#endif - if (NULL == bottom_data) { - if (batchNormFwdInference == NULL) { - e = dnnBatchNormalizationCreateForward_v2( - &batchNormFwdInference, NULL, layout_usr_, eps_, - dnnUseInputMeanVariance | dnnUseScaleShift); - CHECK_EQ(e, E_SUCCESS); - - e = dnnBatchNormalizationCreateForward_v2( - &batchNormFwdTraining, NULL, layout_usr_, eps_, dnnUseScaleShift); - CHECK_EQ(e, E_SUCCESS); - - e = dnnBatchNormalizationCreateBackward_v2( - &batchNormBwdScaleShift, NULL, layout_usr_, eps_, bwd_flags); - CHECK_EQ(e, E_SUCCESS); - } - bottom_data = reinterpret_cast(data.dptr_); - } - - DType * scaleShift_buf = reinterpret_cast(scaleShift_space.dptr); - // use_weight_bias_ - for (int i = 0; i < channels_; i++) { - scaleShift_buf[i] = (slope.dptr_)[i]; - } - for (int i = 0; i < channels_; i++) { - scaleShift_buf[channels_ + i] = (bias.dptr_)[i]; - } - - void* BatchNorm_res[dnnResourceNumber]; - BatchNorm_res[dnnResourceSrc] = bottom_data; - BatchNorm_res[dnnResourceScaleShift] = scaleShift_space.dptr; - - BatchNorm_res[dnnResourceDst] = fwd_top_data->get_output_ptr(out.dptr_, - fwd_top_data, out_data[batchnorm::kOut]); - if (ctx.is_train && !param_.use_global_stats) { - Tensor mean = out_data[batchnorm::kMean].get(s); - Tensor var = out_data[batchnorm::kVar].get(s); - CHECK(req[batchnorm::kMean] == kNullOp || req[batchnorm::kMean] == kWriteTo); - CHECK(req[batchnorm::kVar] == kNullOp || req[batchnorm::kVar] == kWriteTo); - BatchNorm_res[dnnResourceMean] = mean.dptr_; - BatchNorm_res[dnnResourceVariance] = var.dptr_; - e = dnnExecute(batchNormFwdTraining, BatchNorm_res); - CHECK_EQ(e, E_SUCCESS); - } else { - BatchNorm_res[dnnResourceMean] = moving_mean.dptr_; - BatchNorm_res[dnnResourceVariance] = moving_var.dptr_; - e = dnnExecute(batchNormFwdInference, BatchNorm_res); - CHECK_EQ(e, E_SUCCESS); - } - -#if MKL_EXPERIMENTAL == 0 - if (fwd_top_data->conversion_needed()) { - fwd_top_data->convert_from_prv(out.dptr_); - } -#endif - } - - virtual void Backward(const OpContext &ctx, - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &req, - const std::vector &in_grad, - const std::vector &aux_states) { - using namespace mshadow; - using namespace mshadow::expr; - CHECK_EQ(out_grad.size(), 1); - CHECK_EQ(in_data.size(), 3); - CHECK_EQ(out_data.size(), 3); - CHECK_EQ(in_grad.size(), 3); - Stream *s = ctx.get_stream(); - Tensor data, grad, grad_in; - - if (in_data[batchnorm::kData].ndim() == 2) { - Shape<4> dshape = Shape4(out_grad[batchnorm::kOut].shape_[0], - out_grad[batchnorm::kOut].shape_[1], 1, 1); - data = mkl_experimental_direct_get_with_shape( - in_data[batchnorm::kData], dshape, s); - grad = mkl_experimental_direct_get_with_shape( - out_grad[batchnorm::kOut], dshape, s); - grad_in = mkl_experimental_direct_get_with_shape( - in_grad[batchnorm::kData], dshape, s); - } else { - data = mkl_experimental_direct_get(in_data[batchnorm::kData], s); - grad = mkl_experimental_direct_get(out_grad[batchnorm::kOut], s); - grad_in = mkl_experimental_direct_get(in_grad[batchnorm::kData], s); - } - - Tensor slope = in_data[batchnorm::kGamma].get(s); - Tensor gslope = in_grad[batchnorm::kGamma].get(s); - Tensor gbias = in_grad[batchnorm::kBeta].get(s); - Tensor mean = out_data[batchnorm::kMean].get(s); - Tensor var = out_data[batchnorm::kVar].get(s); - Tensor moving_mean = aux_states[batchnorm::kMovingMean].get(s); - Tensor moving_var = aux_states[batchnorm::kMovingVar].get(s); - - if (param_.fix_gamma) slope = 1.f; - - void* bottom_data = NULL; -#if MKL_EXPERIMENTAL == 1 - bottom_data = reinterpret_cast(mkl_prv_data(in_data[batchnorm::kData])); -#endif - if (NULL == bottom_data) - bottom_data = reinterpret_cast(data.dptr_); - - dnnError_t e; - void* BatchNorm_res[dnnResourceNumber]; - BatchNorm_res[dnnResourceSrc] = bottom_data; - BatchNorm_res[dnnResourceScaleShift] = scaleShift_space.dptr; - if (ctx.is_train && !param_.use_global_stats) { - int size = mean.size(0); // Tensor - float * moving_mean_ptr = reinterpret_cast(moving_mean.dptr_); - float * mean_ptr = reinterpret_cast(mean.dptr_); - float * moving_var_ptr = reinterpret_cast(moving_var.dptr_); - float * var_ptr = reinterpret_cast(var.dptr_); - float minus_mom = (1 - param_.momentum); - for (int i = 0; i < size; i++) { - moving_mean_ptr[i] = moving_mean_ptr[i] * param_.momentum - + mean_ptr[i] * minus_mom; - } - for (int i = 0; i < size; i++) { - moving_var_ptr[i] = moving_var_ptr[i] * param_.momentum - + var_ptr[i] * minus_mom; - } - BatchNorm_res[dnnResourceMean] = mean.dptr_; - BatchNorm_res[dnnResourceVariance] = var.dptr_; - } else { - BatchNorm_res[dnnResourceMean] = moving_mean.dptr_; - BatchNorm_res[dnnResourceVariance] = moving_var.dptr_; - } - - - BatchNorm_res[dnnResourceDiffSrc] = bwd_bottom_diff->get_output_ptr(grad_in.dptr_, - bwd_bottom_diff, in_grad[batchnorm::kData]); - BatchNorm_res[dnnResourceDiffDst] = bwd_top_diff->get_converted_prv(grad.dptr_, - true, out_grad[batchnorm::kOut]); - BatchNorm_res[dnnResourceDiffScaleShift] = scaleShiftDiff_space.dptr; - e = dnnExecute(batchNormBwdScaleShift, BatchNorm_res); - CHECK_EQ(e, E_SUCCESS); -#if MKL_EXPERIMENTAL == 0 - if (bwd_bottom_diff->conversion_needed()) { - bwd_bottom_diff->convert_from_prv(grad_in.dptr_); - } -#endif - DType * scaleShiftDiff_buf = reinterpret_cast(scaleShiftDiff_space.dptr); - if (!param_.fix_gamma) { - // Store ScaleShift blobs - DType* diff_scale = gslope.dptr_; - for (int i = 0; i < channels_; i++) { - diff_scale[i] = scaleShiftDiff_buf[i]; - } - } else { - int gslope_size = gslope.size(0); - float * gslope_ptr = reinterpret_cast(gslope.dptr_); - for (int i = 0; i < gslope_size; i++) { - *gslope_ptr++ = 0.0f; - } - } - DType* diff_shift = gbias.dptr_; - for (int i = 0; i < channels_; i++) { - diff_shift[i] = scaleShiftDiff_buf[channels_ + i]; - } - } - - private: - BatchNormParam param_; - DType eps_; - bool use_weight_bias_; - - int num_; - int channels_; - int height_; - int width_; - bool init_mkldnn_ = false; - std::shared_ptr > fwd_top_data; - std::shared_ptr > fwd_bottom_data; - std::shared_ptr > bwd_top_diff; - std::shared_ptr > bwd_bottom_diff; - dnnPrimitive_t batchNormFwdInference = NULL; - dnnPrimitive_t batchNormFwdTraining = NULL; - dnnPrimitive_t batchNormBwdScaleShift = NULL; - Storage::Handle scaleShift_space; - Storage::Handle scaleShiftDiff_space; - dnnLayout_t layout_usr_ = NULL; -}; // class BatchNormOp -} // namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_MKL_MKL_BATCH_NORM_INL_H_ diff --git a/src/operator/mkl/mkl_concat-inl.h b/src/operator/mkl/mkl_concat-inl.h deleted file mode 100644 index 1ed1e81d1303..000000000000 --- a/src/operator/mkl/mkl_concat-inl.h +++ /dev/null @@ -1,314 +0,0 @@ -/******************************************************************************* -* Copyright 2016 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -* \file mkl_concat-inl.h -* \brief -* \author lingyan.guo@intel.com -* zhenlin.luo@intel.com -* -*******************************************************************************/ -#ifndef MXNET_OPERATOR_MKL_MKL_CONCAT_INL_H_ -#define MXNET_OPERATOR_MKL_MKL_CONCAT_INL_H_ -#include -#include -#include -#include -#include -#include -#include -#include -#include "../operator_common.h" -#include "../channel_op_common.h" -#include "./mkl_util-inl.h" -namespace mxnet { -namespace op { - - -template -class MKLConcatOp : public Operator { - public: - static std::string getName() { - return "MKLConcatOp"; - } - explicit MKLConcatOp(ConcatParam param) - : size_(param.num_args), dimension_(param.dim), init_mkldnn_(false) { - concatFwd_ = static_cast(NULL); - concatBwd_ = static_cast(NULL); - fwd_top_data_ = MKLData::create(); - bwd_top_diff_ = MKLData::create(); - - num_concats_ = param.num_args; - } - virtual ~MKLConcatOp() { - dnnDelete(concatFwd_); - dnnDelete(concatBwd_); - } - - private: - void LayerSetUp(const std::vector > &data, - const mshadow::Tensor &out, - size_t data_shape_size, size_t *split_channels_) { - size_t dim_src = data_shape_size; - size_t dim_dst = dim_src; - num_concats_ = size_; - channels_ = 0; - - for (size_t i = 1; i < num_concats_; ++i) { - for (size_t j = 1; j < data_shape_size; ++j) { - if (j == dimension_) continue; - CHECK_EQ(data[0].shape_[j], data[i].shape_[j]); - } - } - - for (size_t i = 0; i < num_concats_; ++i) { - CHECK_EQ((int)dim_src, data[i].shape_.kDimension); - - fwd_bottom_data_.push_back(MKLData::create()); - bwd_bottom_diff_.push_back(MKLData::create()); - fwd_bottom_data_[i]->name = "fwd_bottom_data_[i]"; - bwd_bottom_diff_[i]->name = "bwd_bottom_data[i]"; - - size_t *sizes_src = new size_t[dim_src]; - size_t *strides_src = new size_t[dim_src]; - for (size_t d = 0; d < dim_src; ++d) { - sizes_src[d] = data[i].shape_[dim_src - d - 1]; - strides_src[d] = (d == 0) ? 1 : strides_src[d - 1] * sizes_src[d - 1]; - } - - split_channels_[i] = data[i].shape_[1]; - channels_ += split_channels_[i]; - fwd_bottom_data_[i]->create_user_layout(dim_src, sizes_src, strides_src); - bwd_bottom_diff_[i]->create_user_layout(dim_src, sizes_src, strides_src); - delete[] sizes_src; - delete[] strides_src; - } - size_t *sizes_dst = new size_t[dim_dst]; - size_t *strides_dst = new size_t[dim_dst]; - for (size_t d = 0; d < dim_dst; ++d) { - if (d == 2) - sizes_dst[d] = channels_; - else - sizes_dst[d] = data[0].shape_[dim_dst - 1 - d]; - strides_dst[d] = (d == 0) ? 1 : strides_dst[d - 1] * sizes_dst[d - 1]; - } - bwd_top_diff_->create_user_layout(dim_dst, sizes_dst, strides_dst); - fwd_top_data_->create_user_layout(dim_dst, sizes_dst, strides_dst); - delete[] sizes_dst; - delete[] strides_dst; - concatFwd_ = NULL; - concatBwd_ = NULL; - } - - public: - virtual void Forward(const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data, - const std::vector &aux_args) { - using namespace mshadow; - using namespace mshadow::expr; - CHECK_EQ(static_cast(in_data.size()), size_); - CHECK_EQ(out_data.size(), 1); - CHECK_LT(dimension_, (size_t)in_data[concat_enum::kData0].ndim()); - Stream *s = ctx.get_stream(); - std::vector > data(size_); - Tensor out; - if (in_data[0].ndim() == 2) { - for (int i = 0; i < size_; ++i) { - Shape<4> dshape = Shape4(in_data[i].shape_[0], - in_data[i].shape_[1], 1, 1); - data[i] = mkl_experimental_direct_get_with_shape( - in_data[i], dshape, s); - } - Shape<4> dshape = Shape4(out_data[concat_enum::kOut].shape_[0], - out_data[concat_enum::kOut].shape_[1], 1, 1); - out = mkl_experimental_direct_get_with_shape( - out_data[concat_enum::kOut], dshape, s); - } else if (in_data[0].ndim() == 3) { - for (int i = 0; i < size_; ++i) { - Shape<4> dshape = Shape4(in_data[i].shape_[0], - in_data[i].shape_[1], in_data[i].shape_[2], 1); - data[i] = mkl_experimental_direct_get_with_shape( - in_data[i], dshape, s); - } - Shape<4> dshape = Shape4(out_data[concat_enum::kOut].shape_[0], - out_data[concat_enum::kOut].shape_[1], - out_data[concat_enum::kOut].shape_[2], 1); - out = mkl_experimental_direct_get_with_shape( - out_data[concat_enum::kOut], dshape, s); - } else { - for (int i = 0; i < size_; ++i) { - data[i] = mkl_experimental_direct_get(in_data[i], s); - } - out = mkl_experimental_direct_get(out_data[concat_enum::kOut], s); - } - size_t *split_channels_ = new size_t[num_concats_]; - if (!init_mkldnn_) { - init_mkldnn_ = true; - LayerSetUp(data, out, 4, split_channels_); - } - - dnnError_t e; - std::vector bottom_data; - bool isFirstPass = (concatFwd_ == NULL); - dnnLayout_t *layouts = NULL; - if (isFirstPass) { - layouts = new dnnLayout_t[num_concats_]; - } - - for (size_t i = 0; i < num_concats_; i++) { - void * bottom_i = NULL; -#if MKL_EXPERIMENTAL == 1 - bottom_i = mkl_prv_data(in_data[i]); - if (bottom_i != NULL) { - if (isFirstPass) { - std::shared_ptr > mem_descr = - mkl_get_mem_desc(in_data[i].Mkl_mem_); - fwd_bottom_data_[i] = mem_descr; - layouts[i] = mem_descr->layout_int; - } - } -#endif - if (bottom_i == NULL) { - bottom_i = data[i].dptr_; - if (isFirstPass) { - layouts[i] = fwd_bottom_data_[i]->layout_usr; - } - } - - bottom_data.push_back(reinterpret_cast(bottom_i)); - } - - if (isFirstPass) { - e = dnnConcatCreate(&concatFwd_, NULL, num_concats_, layouts); - CHECK_EQ(e, E_SUCCESS); - - fwd_top_data_->create_internal_layout(concatFwd_, dnnResourceDst); - bwd_top_diff_->create_internal_layout(concatFwd_, dnnResourceDst); - - e = dnnSplitCreate(&concatBwd_, NULL, num_concats_, - bwd_top_diff_->layout_int, split_channels_); - CHECK_EQ(e, E_SUCCESS); - - for (size_t n = 0; n < num_concats_; ++n) { - fwd_bottom_data_[n]->create_internal_layout(concatFwd_, - (dnnResourceType_t)(dnnResourceMultipleSrc + n)); - bwd_bottom_diff_[n]->create_internal_layout(concatBwd_, - (dnnResourceType_t)(dnnResourceMultipleDst + n)); - } - } - delete[] layouts; - - void *concat_res[dnnResourceNumber]; - for (size_t i = 0; i < num_concats_; ++i) { - concat_res[dnnResourceMultipleSrc + i] - = reinterpret_cast(bottom_data[i]); - } - - concat_res[dnnResourceDst] = fwd_top_data_->get_output_ptr(out.dptr_, - fwd_top_data_, out_data[concat_enum::kOut]); - e = dnnExecute(concatFwd_, concat_res); - CHECK_EQ(e, E_SUCCESS); - delete[] split_channels_; - } - - virtual void Backward(const OpContext &ctx, - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &req, - const std::vector &in_grad, - const std::vector &aux_states) { - using namespace mshadow; - using namespace mshadow::expr; - CHECK_EQ(out_grad.size(), 1); - CHECK_EQ(in_grad.size(), static_cast(size_)); - Stream *s = ctx.get_stream(); - std::vector > grad_in(size_); - Tensor grad; - if (in_grad[0].ndim() == 2) { - Shape<4> dshape = Shape4(out_grad[concat_enum::kOut].shape_[0], - out_grad[concat_enum::kOut].shape_[1], 1, 1); - grad = mkl_experimental_direct_get_with_shape( - out_grad[concat_enum::kOut], dshape, s); - for (int i = 0; i < size_; ++i) { - dshape = Shape4(in_grad[i].shape_[0], - in_grad[i].shape_[1], 1, 1); - grad_in[i] = mkl_experimental_direct_get_with_shape( - in_grad[i], dshape, s); - } - } else if (in_grad[0].ndim() == 3) { - Shape<4> dshape = Shape4(out_grad[concat_enum::kOut].shape_[0], - out_grad[concat_enum::kOut].shape_[1], - out_grad[concat_enum::kOut].shape_[2], 1); - grad = mkl_experimental_direct_get_with_shape( - out_grad[concat_enum::kOut], dshape, s); - for (int i = 0; i < size_; ++i) { - dshape = Shape4(in_grad[i].shape_[0], - in_grad[i].shape_[1], in_grad[i].shape_[2], 1); - grad_in[i] = mkl_experimental_direct_get_with_shape( - in_grad[i], dshape, s); - } - } else { - grad = mkl_experimental_direct_get(out_grad[concat_enum::kOut], s); - for (int i = 0; i < size_; ++i) { - grad_in[i] = mkl_experimental_direct_get(in_grad[i], s); - } - } - - int need_bwd = 0; - for (size_t n = 0; n < num_concats_; n++) { - need_bwd += req[n]; - } - if (!need_bwd) { - return; - } - - dnnError_t e; - void *concat_res[dnnResourceNumber]; - concat_res[dnnResourceSrc] = bwd_top_diff_->get_converted_prv(grad.dptr_, true, - out_grad[concat_enum::kOut]); - for (size_t i = 0; i < num_concats_; ++i) { - concat_res[dnnResourceMultipleDst + i] = bwd_bottom_diff_[i]->get_output_ptr( - grad_in[i].dptr_, bwd_bottom_diff_[i], in_grad[i]); - } - e = dnnExecute(concatBwd_, concat_res); - CHECK_EQ(e, E_SUCCESS); - } - - private: - int size_; - size_t dimension_; - - bool init_mkldnn_; - - dnnPrimitive_t concatFwd_; - dnnPrimitive_t concatBwd_; - std::shared_ptr > fwd_top_data_; - std::vector< std::shared_ptr > > fwd_bottom_data_; - std::shared_ptr > bwd_top_diff_; - std::vector< std::shared_ptr > > bwd_bottom_diff_; - - - size_t width_; - size_t height_; - size_t channels_; - size_t num_; - size_t num_concats_; -}; // class MKLConcatOp -} // namespace op -} // namespace mxnet - -#endif // MXNET_OPERATOR_MKL_MKL_CONCAT_INL_H_ diff --git a/src/operator/mkl/mkl_convolution-inl.h b/src/operator/mkl/mkl_convolution-inl.h deleted file mode 100644 index 813d061f172b..000000000000 --- a/src/operator/mkl/mkl_convolution-inl.h +++ /dev/null @@ -1,490 +0,0 @@ -/******************************************************************************* -* Copyright 2016 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -* \file mkl_convolution-inl.h -* \brief -* \author lingyan.guo@intel.com -* zhenlin.luo@intel.com -* -*******************************************************************************/ -#ifndef MXNET_OPERATOR_MKL_MKL_CONVOLUTION_INL_H_ -#define MXNET_OPERATOR_MKL_MKL_CONVOLUTION_INL_H_ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "../operator_common.h" -#include "../nn/convolution-inl.h" -#include "./mkl_util-inl.h" - -namespace mxnet { -namespace op { - -template -class MKLConvolutionOp : public Operator { - public: - static std::string getName() { - return "MKLConvolutionOp"; - } - void SetupBuffer() { - convolutionBwdBias = static_cast(NULL); - convolutionBwdFilter = static_cast(NULL); - convolutionBwdData = static_cast(NULL); - convolutionFwd = static_cast(NULL); - fwd_bottom_data = MKLData::create(); - fwd_top_data = MKLData::create(); - fwd_filter_data = MKLData::create(); - fwd_bias_data = MKLData::create(); - bwdd_top_diff = MKLData::create(); - bwdd_bottom_diff = MKLData::create(); - bwdd_filter_data = MKLData::create(); - bwdf_top_diff = MKLData::create(); - bwdf_filter_diff = MKLData::create(); - bwdf_bottom_data = MKLData::create(); - bwdb_top_diff = MKLData::create(); - bwdb_bias_diff = MKLData::create(); - // Names are for debugging purposes only. - fwd_bottom_data->name = "fwd_bottom_data @ " + this->getName(); - fwd_top_data->name = "fwd_top_data @ " + this->getName(); - fwd_filter_data->name = "fwd_filter_data @ " + this->getName(); - fwd_bias_data->name = "fwd_bias_data @ " + this->getName(); - bwdd_top_diff->name = "bwdd_top_diff @ " + this->getName(); - bwdd_bottom_diff->name = "bwdd_bottom_diff @ " + this->getName(); - bwdd_filter_data->name = "bwdd_filter_data @ " + this->getName(); - bwdf_top_diff->name = "bwdf_top_diff @ " + this->getName(); - bwdf_bottom_data->name = "bwdf_bottom_data @ " + this->getName(); - bwdf_filter_diff->name = "bwdf_filter_diff @ " + this->getName(); - bwdb_top_diff->name = "bwdb_top_diff @ " + this->getName(); - bwdb_bias_diff->name = "bwdb_bias_diff @ " + this->getName(); - } - - explicit MKLConvolutionOp(ConvolutionParam p): - convolutionFwd(NULL), - convolutionBwdData(static_cast(NULL)), - convolutionBwdFilter(static_cast(NULL)), - convolutionBwdBias(static_cast(NULL)) { - this->param_ = p; - init_mkldnn_ = false; - // convert MBytes first to Bytes and then to elements. - param_.workspace = (param_.workspace << 20) / sizeof(DType); - SetupBuffer(); - } - void ReleaseBuffer() { - if (convolutionFwd != NULL) { - dnnDelete(convolutionFwd); - convolutionFwd = NULL; - } - if (convolutionBwdData != NULL) { - dnnDelete(convolutionBwdData); - convolutionBwdData = NULL; - } - if (convolutionBwdFilter != NULL) { - dnnDelete(convolutionBwdFilter); - convolutionBwdFilter = NULL; - } - if (!param_.no_bias && convolutionBwdBias != NULL) { - dnnDelete(convolutionBwdBias); - convolutionBwdBias = NULL; - } - } - virtual ~MKLConvolutionOp() { - ReleaseBuffer(); - } - - private: - void LayerSetUp(const mshadow::Tensor &data, - const mshadow::Tensor &out) { - this->width_ = data.shape_[3]; - this->height_ = data.shape_[2]; - this->channels_ = data.shape_[1]; - this->num_ = data.shape_[0]; - this->group_ = param_.num_group; - this->width_out_ = out.shape_[3]; - this->height_out_ = out.shape_[2]; - int channel_out_ = out.shape_[1]; - this->num_output_ = channel_out_; - kernel_w_ = param_.kernel[1]; - kernel_h_ = param_.kernel[0]; - stride_w_ = param_.stride[1]; - stride_h_ = param_.stride[0]; - pad_w_ = param_.pad[1]; - pad_h_ = param_.pad[0]; - int status; - size_t n, g; - size_t iw, ih, ic; - size_t ow, oh, oc; - size_t kw, kh; - size_t dimension = 4; - g = std::max(this->group_, 1); - n = this->num_; - iw = this->width_; - ih = this->height_; - ic = this->channels_; - ow = this->width_out_; - oh = this->height_out_; - oc = this->num_output_; - kw = this->kernel_w_; - kh = this->kernel_h_; - oc = this->num_output_; - size_t bdata_sizes[4] = { iw, ih, ic, n }; - size_t bdata_strides[4] = { 1, iw, iw*ih, iw*ih*ic }; - /* starting with MKL 2017 Gold in case of groups filter layout - * becomes 5D, i.e. groups become a separate dimension */ - size_t g_mkl2017 = g; - size_t f_dimension = dimension + (g != 1); - if (getMKLBuildDate() < 20160701) { - g_mkl2017 = 1; - f_dimension = dimension; - } - size_t fdata_sizes[5] = { kw, kh, ic / g, oc / g_mkl2017, g_mkl2017 }; - size_t fdata_strides[5] = { 1, kw, kw*kh, kw*kh*ic / g, kw*kh*ic / g*oc / g }; - size_t bias_sizes[1] = { oc }; - size_t bias_strides[1] = { 1 }; - size_t tdata_sizes[4] = { ow, oh, oc, n }; - size_t tdata_strides[4] = { 1, ow, ow*oh, ow*oh*oc }; - size_t convolutionStrides[2] = { this->stride_w_, this->stride_h_ }; - int inputOffset[2] = { -this->pad_w_, -this->pad_h_ }; - // Names are for debugging purposes only. - /*** convolution section ***/ - if (!param_.no_bias) { - status = dnnGroupsConvolutionCreateForwardBias(&convolutionFwd, - NULL, - dnnAlgorithmConvolutionDirect, - g, - dimension, - bdata_sizes, - tdata_sizes, - fdata_sizes, - convolutionStrides, - inputOffset, - dnnBorderZeros); - } else { - status = dnnGroupsConvolutionCreateForward(&convolutionFwd, - NULL, - dnnAlgorithmConvolutionDirect, - g, - dimension, - bdata_sizes, - tdata_sizes, - fdata_sizes, - convolutionStrides, - inputOffset, - dnnBorderZeros); - } - CHECK_EQ(status, 0) - << "Failed dnnCreateConvolution(dnnForward) with status " - << status << "\n"; - fwd_bottom_data->create_layouts(convolutionFwd, dnnResourceSrc, dimension, - bdata_sizes, bdata_strides); - fwd_top_data->create_layouts(convolutionFwd, dnnResourceDst, dimension, - tdata_sizes, tdata_strides); - fwd_filter_data->create_layouts(convolutionFwd, dnnResourceFilter, - f_dimension, fdata_sizes, fdata_strides); - if (!param_.no_bias) - fwd_bias_data->create_layouts(convolutionFwd, dnnResourceBias, 1, - bias_sizes, bias_strides); - /* - * Backward by data layer setup - */ - status = dnnGroupsConvolutionCreateBackwardData(&convolutionBwdData, - NULL, - dnnAlgorithmConvolutionDirect, - g, - dimension, - bdata_sizes, - tdata_sizes, - fdata_sizes, - convolutionStrides, - inputOffset, - dnnBorderZeros); - CHECK_EQ(status, 0) - << "Failed dnnConvolutionCreateBackwardData with status " - << status << "\n"; - bwdd_bottom_diff->create_layouts(convolutionBwdData, dnnResourceDiffSrc, - dimension, bdata_sizes, bdata_strides); - bwdd_top_diff->create_layouts(convolutionBwdData, dnnResourceDiffDst, - dimension, tdata_sizes, tdata_strides); - bwdd_filter_data->create_layouts(convolutionBwdData, dnnResourceFilter, - f_dimension, fdata_sizes, fdata_strides); - /* - * Backward by filter layer setup - */ - status = dnnGroupsConvolutionCreateBackwardFilter(&convolutionBwdFilter, - NULL, - dnnAlgorithmConvolutionDirect, - g, - dimension, - bdata_sizes, - tdata_sizes, - fdata_sizes, - convolutionStrides, - inputOffset, - dnnBorderZeros); - CHECK_EQ(status, 0) - << "Failed dnnConvolutionCreateBackwardFilter with status " - << status << "\n"; - bwdf_bottom_data->create_layouts(convolutionBwdFilter, dnnResourceSrc, - dimension, bdata_sizes, bdata_strides); - bwdf_top_diff->create_layouts(convolutionBwdFilter, dnnResourceDiffDst, - dimension, tdata_sizes, tdata_strides); - bwdf_filter_diff->create_layouts(convolutionBwdFilter, dnnResourceDiffFilter, - f_dimension, fdata_sizes, fdata_strides); - /* - * Backward by bias layer setup - */ - if (!param_.no_bias) { - status = dnnGroupsConvolutionCreateBackwardBias(&convolutionBwdBias, - NULL, - dnnAlgorithmConvolutionDirect, - g, - dimension, - tdata_sizes); - CHECK_EQ(status, 0) - << "Failed dnnConvolutionCreateBackwardBias with status " - << status << "\n"; - bwdb_top_diff->create_layouts(convolutionBwdBias, dnnResourceDiffDst, - dimension, tdata_sizes, tdata_strides); - bwdb_bias_diff->create_layouts(convolutionBwdBias, dnnResourceDiffBias, 1, - bias_sizes, bias_strides); - } - } - - public: - virtual void Forward(const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data, - const std::vector &aux_args) { - using namespace mshadow; - Stream *s = ctx.get_stream(); - DType *data_ptr = NULL; - DType *wmat_ptr = NULL; - DType *out_ptr = NULL; - Tensor data = - mkl_experimental_direct_get(in_data[conv::kData], s); - Tensor out = - mkl_experimental_direct_get(out_data[conv::kOut], s); - Tensor wmat = - mkl_experimental_direct_get(in_data[conv::kWeight], s); - if (!init_mkldnn_) { - LayerSetUp(data, out); - init_mkldnn_ = true; - } - CHECK_EQ(data.CheckContiguous(), true); - CHECK_EQ(wmat.CheckContiguous(), true); - CHECK_EQ(out.CheckContiguous(), true); - data_ptr = data.dptr_; - wmat_ptr = wmat.dptr_; - out_ptr = out.dptr_; - int status; - void *res_convolutionFwd[dnnResourceNumber]; - res_convolutionFwd[dnnResourceSrc] = - fwd_bottom_data->get_converted_prv(data_ptr, false, in_data[conv::kData]); - res_convolutionFwd[dnnResourceFilter] = - fwd_filter_data->get_converted_prv(wmat_ptr, true, in_data[conv::kWeight]); - if (!param_.no_bias) { - Tensor bias = - mkl_experimental_direct_get(in_data[conv::kBias], s); - res_convolutionFwd[dnnResourceBias] = - fwd_bias_data->get_converted_prv(bias.dptr_, true, in_data[conv::kBias]); - } - - res_convolutionFwd[dnnResourceDst] = fwd_top_data->get_output_ptr(out_ptr, - fwd_top_data, out_data[conv::kOut]); - status = dnnExecute(convolutionFwd, res_convolutionFwd); - CHECK_EQ(status, 0) << "Forward convolution failed with status " << status; -#if MKL_EXPERIMENTAL == 0 - if (fwd_top_data->conversion_needed()) { - fwd_top_data->convert_from_prv(out_ptr); - } -#endif - } - void AddToModeAllocAndStoreBuffer(void *src, int blob_size, Storage::Handle *pws) { - int blob_byte_size = blob_size * sizeof(DType); - *pws = Storage::Get()->Alloc(blob_byte_size, Context::CPU()); - memcpy(pws->dptr, src, blob_byte_size); - } - void AddToModeAddAndReleaseBuffer(Storage::Handle *pws, void *dst_, int blob_size) { - DType *dst = reinterpret_cast(dst_); - DType *src = reinterpret_cast(pws->dptr); -#pragma omp parallel for - for (int i = 0; i < blob_size; i++) { - dst[i] += src[i]; - } - if (pws->dptr) - Storage::Get()->Free(*pws); - pws->dptr = NULL; - } - virtual void Backward(const OpContext &ctx, - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &req, - const std::vector &in_grad, - const std::vector &aux_args) { - using namespace mshadow; - if (param_.kernel.ndim() > 2) { - LOG(FATAL) << "Volume convolution is not implmented in mshadow"; - } - CHECK_EQ(out_grad.size(), 1); - size_t expected = param_.no_bias == 0 ? 3 : 2; - CHECK(in_data.size() == expected && in_grad.size() == expected); - CHECK_EQ(req.size(), expected); - CHECK_EQ(in_data[conv::kWeight].CheckContiguous(), true); - Stream *s = ctx.get_stream(); - Tensor data = - mkl_experimental_direct_get(in_data[conv::kData], s); - Shape<3> wmat_shape = - Shape3(param_.num_group, - param_.num_filter / param_.num_group, - data.shape_[1] / param_.num_group * param_.kernel[0] * param_.kernel[1]); - Tensor wmat = - mkl_experimental_direct_get_with_shape( - in_data[conv::kWeight], wmat_shape, s); - Tensor grad = - mkl_experimental_direct_get(out_grad[conv::kOut], s); - Tensor gdata = - mkl_experimental_direct_get(in_grad[conv::kData], s); - Tensor gwmat = - mkl_experimental_direct_get_with_shape( - in_grad[conv::kWeight], wmat_shape, s); - - if (!init_mkldnn_) { - init_mkldnn_ = true; - LayerSetUp(data, grad); - } - int status; - if (req[0]) { - void *res_convolutionBwdData[dnnResourceNumber]; - res_convolutionBwdData[dnnResourceDiffDst] = - bwdd_top_diff->get_converted_prv(grad.dptr_, true, out_grad[conv::kOut]); - - res_convolutionBwdData[dnnResourceFilter] = - bwdd_filter_data->get_converted_prv(wmat.dptr_, false, in_data[conv::kWeight]); - Storage::Handle addtoWorkspace; - if (req[0] == kAddTo) { - // wait mkl support addto mode - AddToModeAllocAndStoreBuffer(gdata.dptr_, in_grad[conv::kData].Size(), &addtoWorkspace); - } - - res_convolutionBwdData[dnnResourceDiffSrc] = bwdd_bottom_diff->get_output_ptr(gdata.dptr_, - bwdd_bottom_diff, in_grad[conv::kData]); - status = dnnExecute(convolutionBwdData, res_convolutionBwdData); - CHECK_EQ(status, 0) << "Backward Data conv failed with status " << status; -#if MKL_EXPERIMENTAL == 0 - if (bwdd_bottom_diff->conversion_needed()) { - bwdd_bottom_diff->convert_from_prv(gdata.dptr_); - } -#endif - if (req[0] == kAddTo) { - if (bwdd_bottom_diff->conversion_needed()) { - bwdd_bottom_diff->convert_from_prv(gdata.dptr_); - } - AddToModeAddAndReleaseBuffer(&addtoWorkspace, gdata.dptr_, in_grad[conv::kData].Size()); - } - } - if (req[1]) { - void *res_convolutionBwdFilter[dnnResourceNumber]; - - res_convolutionBwdFilter[dnnResourceDiffDst] = - bwdf_top_diff->get_converted_prv(grad.dptr_, true, out_grad[conv::kOut]); - - res_convolutionBwdFilter[dnnResourceSrc] = - bwdf_bottom_data->get_converted_prv(data.dptr_, false, - in_data[conv::kData]); - Storage::Handle addtoWorkspace; - if (req[1] == kAddTo) { - // wait mkl support addto mode - AddToModeAllocAndStoreBuffer(gwmat.dptr_, in_grad[conv::kWeight].Size(), &addtoWorkspace); - } - - res_convolutionBwdFilter[dnnResourceDiffFilter] = bwdf_filter_diff->get_output_ptr( - gwmat.dptr_, bwdf_filter_diff, in_grad[conv::kWeight]); - status = dnnExecute(convolutionBwdFilter, res_convolutionBwdFilter); - CHECK_EQ(status, 0) << "Backward Filter conv failed with status " << status; -#if MKL_EXPERIMENTAL == 0 - if (bwdf_filter_diff->conversion_needed()) { - bwdf_filter_diff->convert_from_prv(gwmat.dptr_); - } -#endif - if (req[1] == kAddTo) { - if (bwdf_filter_diff->conversion_needed()) { - bwdf_filter_diff->convert_from_prv(gwmat.dptr_); - } - AddToModeAddAndReleaseBuffer(&addtoWorkspace, gwmat.dptr_, in_grad[conv::kWeight].Size()); - } - } - if (!param_.no_bias) { - Tensor gbias = - mkl_experimental_direct_get(in_grad[conv::kBias], s); - void *res_convolutionBwdBias[dnnResourceNumber]; - res_convolutionBwdBias[dnnResourceDiffDst] = - bwdb_top_diff->get_converted_prv(grad.dptr_, true, out_grad[conv::kOut]); - - res_convolutionBwdBias[dnnResourceDiffBias] = bwdb_bias_diff->get_output_ptr(gbias.dptr_, - bwdb_bias_diff, in_grad[conv::kBias]); - status = dnnExecute(convolutionBwdBias, res_convolutionBwdBias); - CHECK_EQ(status, 0) << "Backward Bias failed with status " << status; -#if MKL_EXPERIMENTAL == 0 - if (bwdb_bias_diff->conversion_needed()) { - bwdb_bias_diff->convert_from_prv(gbias.dptr_); - } -#endif - } - } - - private: - ConvolutionParam param_; - size_t width_, - height_, - width_out_, - height_out_, - kernel_w_, - kernel_h_, - stride_w_, - stride_h_; - int group_, - num_, - num_output_; - size_t channels_; - int pad_w_, - pad_h_; - bool init_mkldnn_; - dnnPrimitive_t convolutionFwd; - dnnPrimitive_t convolutionBwdData; - dnnPrimitive_t convolutionBwdFilter; - dnnPrimitive_t convolutionBwdBias; - /* Fwd step */ - std::shared_ptr > fwd_bottom_data, fwd_top_data, fwd_filter_data, - fwd_bias_data; - /* Bwd data step */ - std::shared_ptr > bwdd_top_diff, bwdd_bottom_diff; - std::shared_ptr > bwdd_filter_data; - /* Bwd filter step */ - std::shared_ptr > bwdf_top_diff, bwdf_filter_diff; - std::shared_ptr > bwdf_bottom_data; - std::shared_ptr > bwdf_filter_diff_iter, bwdf2fwd_filter_diff, - bwdb_bias_diff_iter; - /* Bwd bias step */ - std::shared_ptr > bwdb_top_diff, bwdb_bias_diff; -}; // class ConvolutionOp -} // namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_MKL_MKL_CONVOLUTION_INL_H_ diff --git a/src/operator/mkl/mkl_cppwrapper.cc b/src/operator/mkl/mkl_cppwrapper.cc deleted file mode 100644 index 507e5498c85b..000000000000 --- a/src/operator/mkl/mkl_cppwrapper.cc +++ /dev/null @@ -1,44 +0,0 @@ -/******************************************************************************* -* Copyright 2016 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -* \file mkl_cppwrapper.cc -* \brief -* \author lingyan.guo@intel.com -* zhenlin.luo@intel.com -* -*******************************************************************************/ - - - -#include "mkl_cppwrapper.h" -#include -#if MXNET_USE_MKL2017 == 1 -#include "mkl_service.h" - -int getMKLBuildDate() { - static int build = 0; - if (build == 0) { - MKLVersion v; - mkl_get_version(&v); - build = atoi(v.Build); - printf("MKL Build:%d\n", build); - } - return build; -} - -bool enableMKLWarnGenerated() { - return false; -} -#endif // MSHADOW_USE_MKL2017 diff --git a/src/operator/mkl/mkl_cppwrapper.h b/src/operator/mkl/mkl_cppwrapper.h deleted file mode 100644 index 7d66f20ad308..000000000000 --- a/src/operator/mkl/mkl_cppwrapper.h +++ /dev/null @@ -1,1020 +0,0 @@ -/******************************************************************************* -* Copyright 2016 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -* \file mkl_cppwrapper.h -* \brief -* \author lingyan.guo@intel.com -* zhenlin.luo@intel.com -* -*******************************************************************************/ -#ifndef MXNET_OPERATOR_MKL_MKL_CPPWRAPPER_H_ -#define MXNET_OPERATOR_MKL_MKL_CPPWRAPPER_H_ - - -#include -#include -#if MXNET_USE_MKL2017 == 1 -#include "mkl_dnn_types.h" -#include "mkl_dnn.h" -#include "mkl_version.h" - - -extern int getMKLBuildDate(); -extern bool enableMKLWarnGenerated(); - - -template inline dnnError_t dnnLayoutCreate( - dnnLayout_t *pLayout, size_t dimension, const size_t size[], const size_t strides[]); -template <> inline dnnError_t dnnLayoutCreate( - dnnLayout_t *pLayout, size_t dimension, const size_t size[], const size_t strides[]) { - return dnnLayoutCreate_F32(pLayout, dimension, size, strides); -} -template <> inline dnnError_t dnnLayoutCreate( - dnnLayout_t *pLayout, size_t dimension, const size_t size[], const size_t strides[]) { - return dnnLayoutCreate_F64(pLayout, dimension, size, strides); -} - -template inline dnnError_t dnnLayoutCreateFromPrimitive( - dnnLayout_t *pLayout, const dnnPrimitive_t primitive, dnnResourceType_t type); -template <> inline dnnError_t dnnLayoutCreateFromPrimitive( - dnnLayout_t *pLayout, const dnnPrimitive_t primitive, dnnResourceType_t type) { - return dnnLayoutCreateFromPrimitive_F32(pLayout, primitive, type); -} -template <> inline dnnError_t dnnLayoutCreateFromPrimitive( - dnnLayout_t *pLayout, const dnnPrimitive_t primitive, dnnResourceType_t type) { - return dnnLayoutCreateFromPrimitive_F64(pLayout, primitive, type); -} - -template inline size_t dnnLayoutGetMemorySize( - const dnnLayout_t layout); -template <> inline size_t dnnLayoutGetMemorySize( - const dnnLayout_t layout) { - return dnnLayoutGetMemorySize_F32(layout); -} -template <> inline size_t dnnLayoutGetMemorySize( - const dnnLayout_t layout) { - return dnnLayoutGetMemorySize_F64(layout); -} - -template inline int dnnLayoutCompare( - const dnnLayout_t l1, const dnnLayout_t l2); -template <> inline int dnnLayoutCompare( - const dnnLayout_t l1, const dnnLayout_t l2) { - return dnnLayoutCompare_F32(l1, l2); -} -template <> inline int dnnLayoutCompare( - const dnnLayout_t l1, const dnnLayout_t l2) { - return dnnLayoutCompare_F64(l1, l2); -} - - -template inline dnnError_t dnnAllocateBuffer( - void **pPtr, dnnLayout_t layout); -template <> inline dnnError_t dnnAllocateBuffer( - void **pPtr, dnnLayout_t layout) { - return dnnAllocateBuffer_F32(pPtr, layout); -} -template <> inline dnnError_t dnnAllocateBuffer( - void **pPtr, dnnLayout_t layout) { - return dnnAllocateBuffer_F64(pPtr, layout); -} - -template inline dnnError_t dnnReleaseBuffer( - void *ptr); -template <> inline dnnError_t dnnReleaseBuffer( - void *ptr) { - return dnnReleaseBuffer_F32(ptr); -} -template <> inline dnnError_t dnnReleaseBuffer( - void *ptr) { - return dnnReleaseBuffer_F64(ptr); -} - -template inline dnnError_t dnnLayoutDelete( - dnnLayout_t layout); -template <> inline dnnError_t dnnLayoutDelete( - dnnLayout_t layout) { - return dnnLayoutDelete_F32(layout); -} -template <> inline dnnError_t dnnLayoutDelete( - dnnLayout_t layout) { - return dnnLayoutDelete_F64(layout); -} - -template inline dnnError_t dnnPrimitiveAttributesCreate( - dnnPrimitiveAttributes_t *attributes); -template <> inline dnnError_t dnnPrimitiveAttributesCreate( - dnnPrimitiveAttributes_t *attributes) { - return dnnPrimitiveAttributesCreate_F32(attributes); -} -template <> inline dnnError_t dnnPrimitiveAttributesCreate( - dnnPrimitiveAttributes_t *attributes) { - return dnnPrimitiveAttributesCreate_F64(attributes); -} - - -template inline dnnError_t dnnPrimitiveAttributesDestroy( - dnnPrimitiveAttributes_t attributes); -template <> inline dnnError_t dnnPrimitiveAttributesDestroy( - dnnPrimitiveAttributes_t attributes) { - return dnnPrimitiveAttributesDestroy_F32(attributes); -} -template <> inline dnnError_t dnnPrimitiveAttributesDestroy( - dnnPrimitiveAttributes_t attributes) { - return dnnPrimitiveAttributesDestroy_F64(attributes); -} - -template inline dnnError_t dnnPrimitiveGetAttributes( - dnnPrimitive_t primitive, - dnnPrimitiveAttributes_t *attributes); -template <> inline dnnError_t dnnPrimitiveGetAttributes( - dnnPrimitive_t primitive, - dnnPrimitiveAttributes_t *attributes) { - return dnnPrimitiveGetAttributes_F32(primitive, attributes); -} -template <> inline dnnError_t dnnPrimitiveGetAttributes( - dnnPrimitive_t primitive, - dnnPrimitiveAttributes_t *attributes) { - return dnnPrimitiveGetAttributes_F64(primitive, attributes); -} - -template inline dnnError_t dnnExecute( - dnnPrimitive_t primitive, void *resources[]); -template <> inline dnnError_t dnnExecute( - dnnPrimitive_t primitive, void *resources[]) { - return dnnExecute_F32(primitive, resources); -} -template <> inline dnnError_t dnnExecute( - dnnPrimitive_t primitive, void *resources[]) { - return dnnExecute_F64(primitive, resources); -} - -template inline dnnError_t dnnExecuteAsync( - dnnPrimitive_t primitive, void *resources[]); -template <> inline dnnError_t dnnExecuteAsync( - dnnPrimitive_t primitive, void *resources[]) { - return dnnExecuteAsync_F32(primitive, resources); -} -template <> inline dnnError_t dnnExecuteAsync( - dnnPrimitive_t primitive, void *resources[]) { - return dnnExecuteAsync_F64(primitive, resources); -} - -template inline dnnError_t dnnWaitFor( - dnnPrimitive_t primitive); -template <> inline dnnError_t dnnWaitFor( - dnnPrimitive_t primitive) { - return dnnWaitFor_F32(primitive); -} -template <> inline dnnError_t dnnWaitFor( - dnnPrimitive_t primitive) { - return dnnWaitFor_F64(primitive); -} - -template inline dnnError_t dnnDelete( - dnnPrimitive_t primitive); -template <> inline dnnError_t dnnDelete( - dnnPrimitive_t primitive) { - return dnnDelete_F32(primitive); -} -template <> inline dnnError_t dnnDelete( - dnnPrimitive_t primitive) { - return dnnDelete_F64(primitive); -} - - -template inline dnnError_t dnnConversionCreate( - dnnPrimitive_t* pConversion, const dnnLayout_t from, const dnnLayout_t to); -template <> inline dnnError_t dnnConversionCreate( - dnnPrimitive_t* pConversion, const dnnLayout_t from, const dnnLayout_t to) { - return dnnConversionCreate_F32(pConversion, from, to); -} -template <> inline dnnError_t dnnConversionCreate( - dnnPrimitive_t* pConversion, const dnnLayout_t from, const dnnLayout_t to) { - return dnnConversionCreate_F64(pConversion, from, to); -} - - -template inline dnnError_t dnnConversionExecute( - dnnPrimitive_t conversion, void *from, void *to); -template <> inline dnnError_t dnnConversionExecute( - dnnPrimitive_t conversion, void *from, void *to) { - return dnnConversionExecute_F32(conversion, from, to); -} -template <> inline dnnError_t dnnConversionExecute( - dnnPrimitive_t conversion, void *from, void *to) { - return dnnConversionExecute_F64(conversion, from, to); -} - - -template inline dnnError_t dnnConvolutionCreateForward( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t dimension, const size_t srcSize[], const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type); -template <> inline dnnError_t dnnConvolutionCreateForward( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t dimension, const size_t srcSize[], const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnConvolutionCreateForward_F32( - pConvolution, - attributes, - algorithm, - dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} - -template <> inline dnnError_t dnnConvolutionCreateForward( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t dimension, const size_t srcSize[], const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnConvolutionCreateForward_F64( - pConvolution, - attributes, - algorithm, - dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} - - -template inline dnnError_t dnnConvolutionCreateForwardBias( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t dimension, const size_t srcSize[], const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type); -template <> inline dnnError_t dnnConvolutionCreateForwardBias( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t dimension, const size_t srcSize[], const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnConvolutionCreateForwardBias_F32( - pConvolution, - attributes, - algorithm, - dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} -template <> inline dnnError_t dnnConvolutionCreateForwardBias( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t dimension, const size_t srcSize[], const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnConvolutionCreateForwardBias_F64( - pConvolution, - attributes, - algorithm, - dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} - - -template inline dnnError_t dnnConvolutionCreateBackwardData( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t dimension, const size_t srcSize[], - const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type); -template <> inline dnnError_t dnnConvolutionCreateBackwardData( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t dimension, const size_t srcSize[], - const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnConvolutionCreateBackwardData_F32( - pConvolution, - attributes, - algorithm, - dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} -template <> inline dnnError_t dnnConvolutionCreateBackwardData( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t dimension, const size_t srcSize[], - const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnConvolutionCreateBackwardData_F64( - pConvolution, - attributes, - algorithm, - dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} - -template inline dnnError_t dnnConvolutionCreateBackwardFilter( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t dimension, const size_t srcSize[], const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type); -template <> inline dnnError_t dnnConvolutionCreateBackwardFilter( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t dimension, const size_t srcSize[], const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnConvolutionCreateBackwardFilter_F32( - pConvolution, - attributes, - algorithm, - dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} -template <> inline dnnError_t dnnConvolutionCreateBackwardFilter( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t dimension, const size_t srcSize[], const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnConvolutionCreateBackwardFilter_F64( - pConvolution, - attributes, - algorithm, - dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} - -template inline dnnError_t dnnConvolutionCreateBackwardBias( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t dimension, const size_t dstSize[]); -template <> inline dnnError_t dnnConvolutionCreateBackwardBias( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t dimension, const size_t dstSize[]) { - return dnnConvolutionCreateBackwardBias_F32( - pConvolution, - attributes, - algorithm, - dimension, dstSize); -} -template <> inline dnnError_t dnnConvolutionCreateBackwardBias( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t dimension, const size_t dstSize[]) { - return dnnConvolutionCreateBackwardBias_F64( - pConvolution, - attributes, - algorithm, - dimension, dstSize); -} - -template inline dnnError_t dnnGroupsConvolutionCreateForward( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t groups, size_t dimension, const size_t srcSize[], - const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type); -template <> inline dnnError_t dnnGroupsConvolutionCreateForward( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t groups, size_t dimension, const size_t srcSize[], - const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnGroupsConvolutionCreateForward_F32( - pConvolution, - attributes, - algorithm, - groups, dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} -template <> inline dnnError_t dnnGroupsConvolutionCreateForward( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t groups, size_t dimension, const size_t srcSize[], - const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnGroupsConvolutionCreateForward_F64( - pConvolution, - attributes, - algorithm, - groups, dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} - -template inline dnnError_t dnnGroupsConvolutionCreateForwardBias( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t groups, size_t dimension, const size_t srcSize[], - const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type); -template <> inline dnnError_t dnnGroupsConvolutionCreateForwardBias( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t groups, size_t dimension, const size_t srcSize[], - const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnGroupsConvolutionCreateForwardBias_F32( - pConvolution, - attributes, - algorithm, - groups, dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} -template <> inline dnnError_t dnnGroupsConvolutionCreateForwardBias( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t groups, size_t dimension, const size_t srcSize[], - const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnGroupsConvolutionCreateForwardBias_F64( - pConvolution, - attributes, - algorithm, - groups, dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} - -template inline dnnError_t dnnGroupsConvolutionCreateBackwardData( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t groups, size_t dimension, const size_t srcSize[], - const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type); -template <> inline dnnError_t dnnGroupsConvolutionCreateBackwardData( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t groups, size_t dimension, const size_t srcSize[], - const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnGroupsConvolutionCreateBackwardData_F32( - pConvolution, - attributes, - algorithm, - groups, dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} -template <> inline dnnError_t dnnGroupsConvolutionCreateBackwardData( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t groups, size_t dimension, const size_t srcSize[], - const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnGroupsConvolutionCreateBackwardData_F64( - pConvolution, - attributes, - algorithm, - groups, dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} - - -template inline dnnError_t dnnGroupsConvolutionCreateBackwardFilter( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t groups, size_t dimension, const size_t srcSize[], - const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type); -template <> inline dnnError_t dnnGroupsConvolutionCreateBackwardFilter( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t groups, size_t dimension, const size_t srcSize[], - const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnGroupsConvolutionCreateBackwardFilter_F32( - pConvolution, - attributes, - algorithm, - groups, dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} -template <> inline dnnError_t dnnGroupsConvolutionCreateBackwardFilter( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t groups, size_t dimension, const size_t srcSize[], - const size_t dstSize[], const size_t filterSize[], - const size_t convolutionStrides[], const int inputOffset[], const dnnBorder_t border_type) { - return dnnGroupsConvolutionCreateBackwardFilter_F64( - pConvolution, - attributes, - algorithm, - groups, dimension, srcSize, dstSize, filterSize, - convolutionStrides, inputOffset, border_type); -} - -template inline dnnError_t dnnGroupsConvolutionCreateBackwardBias( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t groups, size_t dimension, const size_t dstSize[]); -template <> inline dnnError_t dnnGroupsConvolutionCreateBackwardBias( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t groups, size_t dimension, const size_t dstSize[]) { - return dnnGroupsConvolutionCreateBackwardBias_F32( - pConvolution, - attributes, - algorithm, - groups, dimension, dstSize); -} -template <> inline dnnError_t dnnGroupsConvolutionCreateBackwardBias( - dnnPrimitive_t* pConvolution, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t algorithm, - size_t groups, size_t dimension, const size_t dstSize[]) { - return dnnGroupsConvolutionCreateBackwardBias_F64( - pConvolution, - attributes, - algorithm, - groups, dimension, dstSize); -} - -template inline dnnError_t dnnReLUCreateForward( - dnnPrimitive_t* pRelu, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t dataLayout, float negativeSlope); -template <> inline dnnError_t dnnReLUCreateForward( - dnnPrimitive_t* pRelu, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t dataLayout, float negativeSlope) { - return dnnReLUCreateForward_F32( - pRelu, - attributes, - dataLayout, negativeSlope); -} -template <> inline dnnError_t dnnReLUCreateForward( - dnnPrimitive_t* pRelu, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t dataLayout, float negativeSlope) { - return dnnReLUCreateForward_F64( - pRelu, - attributes, - dataLayout, negativeSlope); -} - -template inline dnnError_t dnnReLUCreateBackward( - dnnPrimitive_t* pRelu, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t diffLayout, const dnnLayout_t dataLayout, float negativeSlope); -template <> inline dnnError_t dnnReLUCreateBackward( - dnnPrimitive_t* pRelu, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t diffLayout, const dnnLayout_t dataLayout, float negativeSlope) { - return dnnReLUCreateBackward_F32( - pRelu, - attributes, - diffLayout, dataLayout, negativeSlope); -} -template <> inline dnnError_t dnnReLUCreateBackward( - dnnPrimitive_t* pRelu, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t diffLayout, const dnnLayout_t dataLayout, float negativeSlope) { - return dnnReLUCreateBackward_F64( - pRelu, - attributes, - diffLayout, dataLayout, negativeSlope); -} - -template inline dnnError_t dnnLRNCreateForward( - dnnPrimitive_t* pLrn, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t dataLayout, size_t kernel_size, float alpha, float beta, float k); -template <> inline dnnError_t dnnLRNCreateForward( - dnnPrimitive_t* pLrn, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t dataLayout, size_t kernel_size, float alpha, float beta, float k) { - return dnnLRNCreateForward_F32( - pLrn, - attributes, - dataLayout, kernel_size, alpha, beta, k); -} -template <> inline dnnError_t dnnLRNCreateForward( - dnnPrimitive_t* pLrn, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t dataLayout, size_t kernel_size, float alpha, float beta, float k) { - return dnnLRNCreateForward_F64( - pLrn, - attributes, - dataLayout, kernel_size, alpha, beta, k); -} - - -template inline dnnError_t dnnLRNCreateBackward( - dnnPrimitive_t* pLrn, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t diffLayout, const dnnLayout_t dataLayout, - size_t kernel_size, float alpha, float beta, float k); -template <> inline dnnError_t dnnLRNCreateBackward( - dnnPrimitive_t* pLrn, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t diffLayout, const dnnLayout_t dataLayout, - size_t kernel_size, float alpha, float beta, float k) { - return dnnLRNCreateBackward_F32( - pLrn, - attributes, - diffLayout, dataLayout, kernel_size, alpha, beta, k); -} -template <> inline dnnError_t dnnLRNCreateBackward( - dnnPrimitive_t* pLrn, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t diffLayout, const dnnLayout_t dataLayout, - size_t kernel_size, float alpha, float beta, float k) { - return dnnLRNCreateBackward_F64( - pLrn, - attributes, - diffLayout, dataLayout, kernel_size, alpha, beta, k); -} - - -template inline dnnError_t dnnPoolingCreateForward( - dnnPrimitive_t* pPooling, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t op, - const dnnLayout_t srcLayout, - const size_t kernelSize[], const size_t kernelStride[], - const int inputOffset[], const dnnBorder_t border_type); -template <> inline dnnError_t dnnPoolingCreateForward( - dnnPrimitive_t* pPooling, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t op, - const dnnLayout_t srcLayout, - const size_t kernelSize[], const size_t kernelStride[], - const int inputOffset[], const dnnBorder_t border_type) { - return dnnPoolingCreateForward_F32( - pPooling, - attributes, - op, - srcLayout, - kernelSize, kernelStride, - inputOffset, border_type); -} -template <> inline dnnError_t dnnPoolingCreateForward( - dnnPrimitive_t* pPooling, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t op, - const dnnLayout_t srcLayout, - const size_t kernelSize[], const size_t kernelStride[], - const int inputOffset[], const dnnBorder_t border_type) { - return dnnPoolingCreateForward_F64( - pPooling, - attributes, - op, - srcLayout, - kernelSize, kernelStride, - inputOffset, border_type); -} - - -template inline dnnError_t dnnPoolingCreateBackward( - dnnPrimitive_t* pPooling, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t op, - const dnnLayout_t srcLayout, - const size_t kernelSize[], const size_t kernelStride[], - const int inputOffset[], const dnnBorder_t border_type); -template <> inline dnnError_t dnnPoolingCreateBackward( - dnnPrimitive_t* pPooling, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t op, - const dnnLayout_t srcLayout, - const size_t kernelSize[], const size_t kernelStride[], - const int inputOffset[], const dnnBorder_t border_type) { - return dnnPoolingCreateBackward_F32( - pPooling, - attributes, - op, - srcLayout, - kernelSize, kernelStride, - inputOffset, border_type); -} -template <> inline dnnError_t dnnPoolingCreateBackward( - dnnPrimitive_t* pPooling, - dnnPrimitiveAttributes_t attributes, - dnnAlgorithm_t op, - const dnnLayout_t srcLayout, - const size_t kernelSize[], const size_t kernelStride[], - const int inputOffset[], const dnnBorder_t border_type) { - return dnnPoolingCreateBackward_F64( - pPooling, - attributes, - op, - srcLayout, - kernelSize, kernelStride, - inputOffset, border_type); -} - -template inline dnnError_t dnnConcatCreate( - dnnPrimitive_t *pConcat, - dnnPrimitiveAttributes_t attributes, - const size_t N, - dnnLayout_t src[]); -template <> inline dnnError_t dnnConcatCreate( - dnnPrimitive_t *pConcat, - dnnPrimitiveAttributes_t attributes, - const size_t N, - dnnLayout_t src[]) { - return dnnConcatCreate_F32( - pConcat, - attributes, - N, - src); -} -template <> inline dnnError_t dnnConcatCreate( - dnnPrimitive_t *pConcat, - dnnPrimitiveAttributes_t attributes, - const size_t N, - dnnLayout_t src[]) { - return dnnConcatCreate_F64( - pConcat, - attributes, - N, - src); -} - - -template inline dnnError_t dnnSplitCreate( - dnnPrimitive_t *pSplit, - dnnPrimitiveAttributes_t attributes, - const size_t N, - dnnLayout_t src, - size_t dst[]); -template <> inline dnnError_t dnnSplitCreate( - dnnPrimitive_t *pSplit, - dnnPrimitiveAttributes_t attributes, - const size_t N, - dnnLayout_t src, - size_t dst[]) { - return dnnSplitCreate_F32( - pSplit, - attributes, - N, - src, - dst); -} -template <> inline dnnError_t dnnSplitCreate( - dnnPrimitive_t *pSplit, - dnnPrimitiveAttributes_t attributes, - const size_t N, - dnnLayout_t src, - size_t dst[]) { - return dnnSplitCreate_F64( - pSplit, - attributes, - N, - src, - dst); -} - -template inline dnnError_t dnnSumCreate( - dnnPrimitive_t *pSum, - dnnPrimitiveAttributes_t attributes, - const size_t nSummands, dnnLayout_t layout, Dtype *coefficients); -template <> inline dnnError_t dnnSumCreate( - dnnPrimitive_t *pSum, - dnnPrimitiveAttributes_t attributes, - const size_t nSummands, dnnLayout_t layout, float *coefficients) { - return dnnSumCreate_F32( - pSum, - attributes, - nSummands, - layout, coefficients); -} -template <> inline dnnError_t dnnSumCreate( - dnnPrimitive_t *pSum, - dnnPrimitiveAttributes_t attributes, - const size_t nSummands, dnnLayout_t layout, double *coefficients) { - return dnnSumCreate_F64( - pSum, - attributes, - nSummands, - layout, coefficients); -} - -template inline dnnError_t dnnBatchNormalizationCreateForward_v2( - dnnPrimitive_t* pBatchNormalization, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t dataLayout, float eps, - int flags); - -template <> inline dnnError_t dnnBatchNormalizationCreateForward_v2( - dnnPrimitive_t* pBatchNormalization, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t dataLayout, float eps, - int flags) { - return dnnBatchNormalizationCreateForward_v2_F32( - pBatchNormalization, - attributes, - dataLayout, eps, flags); -} -template <> inline dnnError_t dnnBatchNormalizationCreateForward_v2( - dnnPrimitive_t* pBatchNormalization, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t dataLayout, float eps, - int flags) { - return dnnBatchNormalizationCreateForward_v2_F64( - pBatchNormalization, - attributes, - dataLayout, eps, flags); -} - - -template inline dnnError_t dnnBatchNormalizationCreateBackward_v2( - dnnPrimitive_t* pBatchNormalization, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t dataLayout, float eps, - int flags); - -template <> inline dnnError_t dnnBatchNormalizationCreateBackward_v2( - dnnPrimitive_t* pBatchNormalization, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t dataLayout, float eps, - int flags) { - return dnnBatchNormalizationCreateBackward_v2_F32( - pBatchNormalization, - attributes, - dataLayout, eps, flags); -} - -template <> inline dnnError_t dnnBatchNormalizationCreateBackward_v2( - dnnPrimitive_t* pBatchNormalization, - dnnPrimitiveAttributes_t attributes, - const dnnLayout_t dataLayout, float eps, - int flags) { - return dnnBatchNormalizationCreateBackward_v2_F64( - pBatchNormalization, - attributes, - dataLayout, eps, flags); -} - -template inline dnnError_t dnnInnerProductCreateForward( - dnnPrimitive_t *pInnerProduct, - dnnPrimitiveAttributes_t attributes, - size_t dimensions, - const size_t srcSize[], - size_t outputChannels); -template <> inline dnnError_t dnnInnerProductCreateForward( - dnnPrimitive_t *pInnerProduct, - dnnPrimitiveAttributes_t attributes, - size_t dimensions, - const size_t srcSize[], - size_t outputChannels) { - return dnnInnerProductCreateForward_F32(pInnerProduct, - attributes, dimensions, - srcSize, outputChannels); -} -template <> inline dnnError_t dnnInnerProductCreateForward( - dnnPrimitive_t *pInnerProduct, - dnnPrimitiveAttributes_t attributes, - size_t dimensions, - const size_t srcSize[], - size_t outputChannels) { - return dnnInnerProductCreateForward_F64(pInnerProduct, - attributes, dimensions, - srcSize, outputChannels); -} - -template inline dnnError_t dnnInnerProductCreateForwardBias( - dnnPrimitive_t *pInnerProduct, - dnnPrimitiveAttributes_t attributes, - size_t dimensions, - const size_t srcSize[], - size_t outputChannels); - -template <> inline dnnError_t dnnInnerProductCreateForwardBias( - dnnPrimitive_t *pInnerProduct, - dnnPrimitiveAttributes_t attributes, - size_t dimensions, - const size_t srcSize[], - size_t outputChannels) { - return dnnInnerProductCreateForwardBias_F32(pInnerProduct, - attributes, dimensions, - srcSize, outputChannels); -} -template <> inline dnnError_t dnnInnerProductCreateForwardBias( - dnnPrimitive_t *pInnerProduct, - dnnPrimitiveAttributes_t attributes, - size_t dimensions, - const size_t srcSize[], - size_t outputChannels) { - return dnnInnerProductCreateForwardBias_F64(pInnerProduct, - attributes, dimensions, - srcSize, outputChannels); -} - - -template inline dnnError_t dnnInnerProductCreateBackwardData( - dnnPrimitive_t *pInnerProduct, - dnnPrimitiveAttributes_t attributes, - size_t dimensions, - const size_t srcSize[], - size_t outputChannels); - -template <> inline dnnError_t dnnInnerProductCreateBackwardData( - dnnPrimitive_t *pInnerProduct, - dnnPrimitiveAttributes_t attributes, - size_t dimensions, - const size_t srcSize[], - size_t outputChannels) { - return dnnInnerProductCreateBackwardData_F32(pInnerProduct, - attributes, dimensions, - srcSize, outputChannels); -} -template <> inline dnnError_t dnnInnerProductCreateBackwardData( - dnnPrimitive_t *pInnerProduct, - dnnPrimitiveAttributes_t attributes, - size_t dimensions, - const size_t srcSize[], - size_t outputChannels) { - return dnnInnerProductCreateBackwardData_F64(pInnerProduct, - attributes, dimensions, - srcSize, outputChannels); -} - - - - -template inline dnnError_t dnnInnerProductCreateBackwardFilter( - dnnPrimitive_t *pInnerProduct, - dnnPrimitiveAttributes_t attributes, - size_t dimensions, - const size_t srcSize[], - size_t outputChannels); - -template <> inline dnnError_t dnnInnerProductCreateBackwardFilter( - dnnPrimitive_t *pInnerProduct, - dnnPrimitiveAttributes_t attributes, - size_t dimensions, - const size_t srcSize[], - size_t outputChannels) { - return dnnInnerProductCreateBackwardFilter_F32(pInnerProduct, - attributes, dimensions, - srcSize, outputChannels); -} -template <> inline dnnError_t dnnInnerProductCreateBackwardFilter( - dnnPrimitive_t *pInnerProduct, - dnnPrimitiveAttributes_t attributes, - size_t dimensions, - const size_t srcSize[], - size_t outputChannels) { - return dnnInnerProductCreateBackwardFilter_F64(pInnerProduct, - attributes, dimensions, - srcSize, outputChannels); -} - - - -template inline dnnError_t dnnInnerProductCreateBackwardBias( - dnnPrimitive_t *pInnerProduct, - dnnPrimitiveAttributes_t attributes, - size_t dimensions, - const size_t dstSize[]); - -template <> inline dnnError_t dnnInnerProductCreateBackwardBias( - dnnPrimitive_t *pInnerProduct, - dnnPrimitiveAttributes_t attributes, - size_t dimensions, - const size_t dstSize[]) { - return dnnInnerProductCreateBackwardBias_F32(pInnerProduct, - attributes, dimensions, - dstSize); -} -template <> inline dnnError_t dnnInnerProductCreateBackwardBias( - dnnPrimitive_t *pInnerProduct, - dnnPrimitiveAttributes_t attributes, - size_t dimensions, - const size_t dstSize[]) { - return dnnInnerProductCreateBackwardBias_F64(pInnerProduct, - attributes, dimensions, - dstSize); -} -#endif // #MXNET_USE_MKL2017 == 1 -#endif // MXNET_OPERATOR_MKL_MKL_CPPWRAPPER_H_ diff --git a/src/operator/mkl/mkl_elementwise_copy-inl.h b/src/operator/mkl/mkl_elementwise_copy-inl.h deleted file mode 100644 index 48c931291150..000000000000 --- a/src/operator/mkl/mkl_elementwise_copy-inl.h +++ /dev/null @@ -1,69 +0,0 @@ -/******************************************************************************* -* Copyright 2016 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -* \file mkl_elementwise-inl.h -* \brief -* \author lingyan.guo@intel.com -* zhenlin.luo@intel.com -* -*******************************************************************************/ -#ifndef MXNET_OPERATOR_MKL_MKL_ELEMENTWISE_COPY_INL_H_ -#define MXNET_OPERATOR_MKL_MKL_ELEMENTWISE_COPY_INL_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "../operator_common.h" -#include "../mshadow_op.h" -#include "./mkl_util-inl.h" - - -namespace mxnet { -namespace op { - -template -void MKLIdentityCompute(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - if (!req[0]) return; -#if MKL_EXPERIMENTAL == 1 - if (op::mkl_prv_data(inputs[0])) { - std::shared_ptr in_data_mem = inputs[0].Mkl_mem_; - // User copy to avoid potential problem - std::shared_ptr > top_data = MKLData::create(); - std::shared_ptr top_mem = outputs[0].Mkl_mem_; - top_data->copy_from(in_data_mem); - top_mem->set_prv_descriptor(top_data); - return; - } -#endif - int in_blob_size = inputs[0].Size(); - int out_blob_size = outputs[0].Size(); - CHECK_EQ(in_blob_size, out_blob_size) << "MKLIdentityCompute CPU Size not Match "; - memcpy(outputs[0].dptr_, inputs[0].dptr_, in_blob_size * sizeof(DType)); -} - - - -} // namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_MKL_MKL_ELEMENTWISE_COPY_INL_H_ diff --git a/src/operator/mkl/mkl_elementwise_sum-inl.h b/src/operator/mkl/mkl_elementwise_sum-inl.h deleted file mode 100644 index d313fd15a5be..000000000000 --- a/src/operator/mkl/mkl_elementwise_sum-inl.h +++ /dev/null @@ -1,117 +0,0 @@ -/******************************************************************************* -* Copyright 2016 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -* \file mkl_elementwise-inl.h -* \brief -* \author lingyan.guo@intel.com -* zhenlin.luo@intel.com -* -*******************************************************************************/ -#ifndef MXNET_OPERATOR_MKL_MKL_ELEMENTWISE_SUM_INL_H_ -#define MXNET_OPERATOR_MKL_MKL_ELEMENTWISE_SUM_INL_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "../operator_common.h" -#include "../mshadow_op.h" -#include "./mkl_util-inl.h" - - -namespace mxnet { -namespace op { -template -static void LayerSetUp(const std::vector > &data, - size_t data_shape_size, - std::shared_ptr > fwd_top_data) { - // Whether to use an asymptotically slower (for >2 inputs) but stabler method - // of computing the gradient for the PROD operation. (No effect for SUM op.) - // stable_prod_grad_ = 1; - size_t dim_src = data_shape_size; - size_t *sizes_src = new size_t[dim_src]; - size_t *strides_src = new size_t[dim_src]; - for (size_t d = 0; d < dim_src; ++d) { - sizes_src[d] = data[0].shape_[dim_src - d - 1]; - strides_src[d] = (d == 0) ? 1 : strides_src[d - 1] * sizes_src[d - 1]; - } - - fwd_top_data->create_user_layout(dim_src, sizes_src, strides_src); - delete[] sizes_src; - delete[] strides_src; -} - -template -void MKLElementWiseSumCompute_(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& in_data, - const std::vector& req, - const std::vector& out_data) { - using namespace mshadow; - using namespace mshadow::expr; - if (req[0] == kNullOp) return; - size_t size = in_data.size(); - Stream *s = ctx.get_stream(); - std::vector > data(size); - Tensor out = out_data[0].FlatTo1D(s); - bool in_place_flag = false; - int in_place_idx = 0; - - for (size_t i = 0; i < size; ++i) { - data[i] = in_data[i].FlatTo1D(s); - if (data[i].dptr_ == out.dptr_) { - in_place_idx = i; - in_place_flag = true; - } - } - std::shared_ptr > fwd_top_data = MKLData::create(); - std::vector coeffs_ = std::vector(data.size(), 1); - LayerSetUp(data, 1, fwd_top_data); - - - dnnError_t e; - void *eltwise_res[dnnResourceNumber]; - dnnPrimitive_t sumPrimitive = NULL; - e = dnnSumCreate(&sumPrimitive, NULL, size, fwd_top_data->layout_usr, - &coeffs_[0]); - CHECK_EQ(e, E_SUCCESS); - - eltwise_res[dnnResourceDst] = reinterpret_cast(const_cast(out.dptr_)); - eltwise_res[dnnResourceMultipleSrc] = - reinterpret_cast(reinterpret_cast(in_data[in_place_idx].dptr_)); - for (size_t i = 1; i < size; ++i) { - if (i == in_place_idx) continue; - eltwise_res[dnnResourceMultipleSrc + i] = - reinterpret_cast(reinterpret_cast(in_data[i].dptr_)); - } - - e = dnnExecute(sumPrimitive, eltwise_res); - CHECK_EQ(e, E_SUCCESS); - - if (sumPrimitive != NULL) { - dnnDelete(sumPrimitive); - sumPrimitive = NULL; - } -} - - - -} // namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_MKL_MKL_ELEMENTWISE_SUM_INL_H_ diff --git a/src/operator/mkl/mkl_fully_connected-inl.h b/src/operator/mkl/mkl_fully_connected-inl.h deleted file mode 100644 index 5e296704b6dd..000000000000 --- a/src/operator/mkl/mkl_fully_connected-inl.h +++ /dev/null @@ -1,192 +0,0 @@ -/******************************************************************************* -* Copyright 2016 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -* \file mkl_fully_connected-inl.h -* \brief -* \author zhenlin.luo@intel.com -* lingyan.guo@intel.com -* -* -*******************************************************************************/ -#ifndef MXNET_OPERATOR_MKL_MKL_FULLY_CONNECTED_INL_H_ -#define MXNET_OPERATOR_MKL_MKL_FULLY_CONNECTED_INL_H_ -#include -#include -#include -#include "../activation-inl.h" -#include "./mkl_util-inl.h" - -namespace mxnet { -namespace op { - -template -class MKLFullyConnectedOp : public Operator { - public: - explicit MKLFullyConnectedOp(const FullyConnectedParam& p, - const std::vector& in_shapes, - const std::vector& out_shapes): - param_(p) { - LayerSetUp(in_shapes, out_shapes); - } - - ~MKLFullyConnectedOp() { - dnnDelete(fullyConnectedFwd); - dnnDelete(fullyConnectedBwdData); - dnnDelete(fullyConnectedBwdFilter); - dnnDelete(fullyConnectedBwdBias); - } - static std::string getName() { - return "MKLFullyConnectedOp"; - } - - private: - void LayerSetUp(const std::vector& in_shapes, - const std::vector& out_shapes) { - const TShape& ishape = in_shapes[fullc::kData]; - - const size_t dim = 4; - const size_t src_sizes[4] = {1, 1, ishape.ProdShape(1, ishape.ndim()), ishape[0]}; - const size_t dst_sizes[2] = {param_.num_hidden, ishape[0]}; - const size_t output_channels = param_.num_hidden; - - dnnPrimitiveAttributes_t attributes = NULL; - MKLDNN_CALL(dnnPrimitiveAttributesCreate(&attributes)); - if (!param_.no_bias) { - MKLDNN_CALL(dnnInnerProductCreateForwardBias( - &fullyConnectedFwd, - attributes, - dim, - src_sizes, - output_channels)); - } else { - MKLDNN_CALL(dnnInnerProductCreateForward( - &fullyConnectedFwd, - attributes, - dim, - src_sizes, - output_channels)); - } - MKLDNN_CALL(dnnInnerProductCreateBackwardData( - &fullyConnectedBwdData, - attributes, - dim, - src_sizes, - output_channels)); - MKLDNN_CALL(dnnInnerProductCreateBackwardFilter( - &fullyConnectedBwdFilter, - attributes, - dim, - src_sizes, - output_channels)); - if (!param_.no_bias) { - MKLDNN_CALL(dnnInnerProductCreateBackwardBias( - &fullyConnectedBwdBias, - attributes, - 2, - dst_sizes)); - } - // TODO(minjie): Shouldn't `attributes` be destroyed? - } - - - virtual void Forward(const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data, - const std::vector &aux_args) { - using namespace mshadow; - using namespace mshadow::expr; - - void* res_fullyConnected[dnnResourceNumber]; - if (req[fullc::kOut] == kNullOp) return; - CHECK_EQ(req[fullc::kOut], kWriteTo); - CHECK_EQ(in_data.size(), param_.no_bias ? 2 : 3); - CHECK_EQ(out_data.size(), 1); - Stream *s = ctx.get_stream(); - - const TShape& ishape = in_data[fullc::kData].shape_; - const TShape& oshape = out_data[fullc::kOut].shape_; - - Tensor data; - Tensor out; - - Shape4(in_data[fullc::kData].shape_[0], in_data[fullc::kData].shape_[1], 1, 1); - - Shape<4> dshape = Shape4(ishape[0], ishape.ProdShape(1, ishape.ndim()), 1, 1); - Shape<4> odshape = Shape4(oshape[0], oshape.ProdShape(1, oshape.ndim()), 1, 1); - - data = in_data[fullc::kData].get_with_shape(dshape, s); - out = out_data[fullc::kOut].get_with_shape(odshape, s); - res_fullyConnected[dnnResourceSrc] = - reinterpret_cast(in_data[fullc::kData].dptr_); - res_fullyConnected[dnnResourceDst] = - reinterpret_cast(out_data[fullc::kOut].dptr_); - res_fullyConnected[dnnResourceFilter] = - reinterpret_cast(in_data[fullc::kWeight].dptr_); - if (!param_.no_bias) { - res_fullyConnected[dnnResourceBias] = reinterpret_cast(in_data[fullc::kBias].dptr_); - } - - MKLDNN_CALL(dnnExecute(fullyConnectedFwd, res_fullyConnected)); - } - - virtual void Backward(const OpContext &ctx, - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &req, - const std::vector &in_grad, - const std::vector &aux_args) { - using namespace mshadow; - using namespace mshadow::expr; - - void* res_fullyConnected[dnnResourceNumber]; - CHECK_EQ(out_grad.size(), 1); - const size_t expected = param_.no_bias ? 2 : 3; - CHECK(in_data.size() == expected && in_grad.size() == expected); - CHECK_EQ(req.size(), expected); - res_fullyConnected[dnnResourceSrc] = - reinterpret_cast(in_data[fullc::kData].dptr_); - res_fullyConnected[dnnResourceFilter] = - reinterpret_cast(in_data[fullc::kWeight].dptr_); - - res_fullyConnected[dnnResourceDiffDst] = - reinterpret_cast(out_grad[fullc::kOut].dptr_); - res_fullyConnected[dnnResourceDiffSrc] = - reinterpret_cast(in_grad[fullc::kData].dptr_); - res_fullyConnected[dnnResourceDiffFilter] = - reinterpret_cast(in_grad[fullc::kWeight].dptr_); - if (!param_.no_bias) { - res_fullyConnected[dnnResourceDiffBias] = - reinterpret_cast(in_grad[fullc::kBias].dptr_); - } - MKLDNN_CALL(dnnExecute(fullyConnectedBwdFilter, res_fullyConnected)); - if (!param_.no_bias) { - MKLDNN_CALL(dnnExecute(fullyConnectedBwdBias, res_fullyConnected)); - } - MKLDNN_CALL(dnnExecute(fullyConnectedBwdData, res_fullyConnected)); - } - - private: - dnnPrimitive_t fullyConnectedFwd{nullptr}; - dnnPrimitive_t fullyConnectedBwdData{nullptr}; - dnnPrimitive_t fullyConnectedBwdFilter{nullptr}; - dnnPrimitive_t fullyConnectedBwdBias{nullptr}; - const FullyConnectedParam param_; -}; // class MKLFullyConnectedOp -} // namespace op -} // namespace mxnet - -#endif // MXNET_OPERATOR_MKL_MKL_FULLY_CONNECTED_INL_H_ diff --git a/src/operator/mkl/mkl_lrn-inl.h b/src/operator/mkl/mkl_lrn-inl.h deleted file mode 100644 index 90dfad50fa62..000000000000 --- a/src/operator/mkl/mkl_lrn-inl.h +++ /dev/null @@ -1,265 +0,0 @@ -/******************************************************************************* -* Copyright 2016 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -* \file mkl_lrn-inl.h -* \brief -* \author zhenlin.luo@intel.com -* lingyan.guo@intel.com -* -*******************************************************************************/ -#ifndef MXNET_OPERATOR_MKL_MKL_LRN_INL_H_ -#define MXNET_OPERATOR_MKL_MKL_LRN_INL_H_ -#include -#include -#include -#include -#include -#include -#include -#include "../operator_common.h" -#include "../mshadow_op.h" -#include "./mkl_util-inl.h" - -namespace mxnet { -namespace op { - -template -class MKLLRNOp : public Operator { - public: - static std::string getName() { - return "MKLLRNOp"; - } - - explicit MKLLRNOp(LRNParam param) : - lrnFwd(static_cast(NULL)), - lrnBwd(static_cast(NULL)), - lrn_buffer_(NULL) { - this->param_ = param; - fwd_top_data_ = MKLData::create(); - fwd_bottom_data_ = MKLData::create(); - bwd_top_diff_ = MKLData::create(); - bwd_bottom_diff_ = MKLData::create(); - init_mkldnn_ = false; - } - - virtual ~MKLLRNOp() { - if (lrnFwd != NULL) { - dnnDelete(lrnFwd); - lrnFwd = NULL; - } - if (lrnBwd != NULL) { - dnnDelete(lrnBwd); - lrnBwd = NULL; - } - dnnReleaseBuffer(lrn_buffer_); - } - - private: - void LayerSetup(const mshadow::Tensor &data, - const mshadow::Tensor &out) { - size_ = param_.nsize; - CHECK_EQ(size_ % 2, 1) << "LRN only supports odd values for local size"; - - alpha_ = param_.alpha; - beta_ = param_.beta; - k_ = param_.knorm; - size_t dim = 4, sizes[4], strides[4]; - channels_ = data.shape_[1]; - height_ = data.shape_[2]; - width_ = data.shape_[3]; - num_ = data.shape_[0]; - sizes[0] = width_; - sizes[1] = height_; - sizes[2] = channels_; - sizes[3] = num_; - - strides[0] = 1; - strides[1] = sizes[0]; - strides[2] = sizes[0] * sizes[1]; - strides[3] = sizes[0] * sizes[1] * sizes[2]; - - fwd_bottom_data_->name = "fwd_bottom_data_ @ " + getName(); - fwd_top_data_->name = "fwd_top_data_ @ " + getName(); - bwd_top_diff_->name = "bwd_top_diff_ @ " + getName(); - bwd_bottom_diff_->name = "bwd_bottom_diff_ @ " + getName(); - - fwd_bottom_data_->create_user_layout(dim, sizes, strides); - fwd_top_data_->create_user_layout(dim, sizes, strides); - bwd_bottom_diff_->create_user_layout(dim, sizes, strides); - bwd_top_diff_->create_user_layout(dim, sizes, strides); - } - - public: - virtual void Forward(const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data, - const std::vector &aux_states) { - using namespace mshadow; - using namespace mshadow::expr; - CHECK_EQ(in_data.size(), 1U); - CHECK_EQ(out_data.size(), 2U); - CHECK_EQ(param_.nsize % 2, 1U) << "LRN only supports odd values for local_size"; - Stream *s = ctx.get_stream(); - Tensor data = mkl_experimental_direct_get( - in_data[lrn_enum::kData], s); - Tensor out = mkl_experimental_direct_get( - out_data[lrn_enum::kOut], s); - if (!init_mkldnn_) { - LayerSetup(data, out); - init_mkldnn_ = true; - } - - const void* bottom_data = NULL; -#if MKL_EXPERIMENTAL == 1 - bottom_data = - reinterpret_cast(mkl_prv_data(in_data[lrn_enum::kData])); -#endif -#if MKL_EXPERIMENTAL == 1 - if (NULL != bottom_data) { - if (lrnFwd == NULL) { - std::shared_ptr bottom_data_mem = - in_data[lrn_enum::kData].Mkl_mem_; - std::shared_ptr bottom_prv_descriptor = - bottom_data_mem->get_prv_descriptor(); - CHECK_EQ(bottom_prv_descriptor->get_descr_type(), - PrvMemDescr::PRV_DESCR_MKL2017); - std::shared_ptr > mem_descr - = std::static_pointer_cast>(bottom_prv_descriptor); - CHECK(mem_descr != nullptr); - fwd_bottom_data_ = mem_descr; - - dnnError_t e; - dnnLayout_t lrn_buffer_l = NULL; - - e = dnnLRNCreateForward(&lrnFwd, NULL, fwd_bottom_data_->layout_int, - size_, alpha_, beta_, k_); - CHECK_EQ(e, E_SUCCESS); - - fwd_top_data_->create_internal_layout(lrnFwd, dnnResourceDst); - - e = dnnLRNCreateBackward(&lrnBwd, NULL, - fwd_bottom_data_->layout_int, fwd_bottom_data_->layout_int, - size_, alpha_, beta_, k_); - CHECK_EQ(e, E_SUCCESS); - - e = dnnLayoutCreateFromPrimitive( - &lrn_buffer_l, lrnFwd, dnnResourceWorkspace); - CHECK_EQ(e, E_SUCCESS); - e = dnnAllocateBuffer( - reinterpret_cast(&lrn_buffer_), lrn_buffer_l); - CHECK_EQ(e, E_SUCCESS); - dnnLayoutDelete(lrn_buffer_l); - - bwd_top_diff_->create_internal_layout(lrnBwd, dnnResourceDiffDst); - bwd_bottom_diff_->create_internal_layout(lrnBwd, dnnResourceDiffSrc); - } - } -#endif - if (bottom_data == NULL) { - if (lrnFwd == NULL) { - dnnError_t e; - dnnLayout_t lrn_buffer_l = NULL; - e = dnnLRNCreateForward(&lrnFwd, NULL, fwd_bottom_data_->layout_usr, - size_, alpha_, beta_, k_); - CHECK_EQ(e, E_SUCCESS); - - e = dnnLayoutCreateFromPrimitive( - &lrn_buffer_l, lrnFwd, dnnResourceWorkspace); - CHECK_EQ(e, E_SUCCESS); - e = dnnAllocateBuffer( - reinterpret_cast(&lrn_buffer_), lrn_buffer_l); - CHECK_EQ(e, E_SUCCESS); - dnnLayoutDelete(lrn_buffer_l); - - e = dnnLRNCreateBackward(&lrnBwd, NULL, - fwd_bottom_data_->layout_usr, fwd_bottom_data_->layout_usr, - size_, alpha_, beta_, k_); - CHECK_EQ(e, E_SUCCESS); - } - bottom_data = data.dptr_; - } - - dnnError_t e; - void* lrn_res[dnnResourceNumber]; - lrn_res[dnnResourceSrc] = const_cast(bottom_data); - - lrn_res[dnnResourceDst] = fwd_top_data_->get_output_ptr( - out.dptr_, fwd_top_data_, out_data[lrn_enum::kOut]); - lrn_res[dnnResourceWorkspace] = lrn_buffer_; - e = dnnExecute(lrnFwd, lrn_res); - CHECK_EQ(e, E_SUCCESS); - } - - virtual void Backward(const OpContext &ctx, - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &req, - const std::vector &in_grad, - const std::vector &aux_states) { - using namespace mshadow; - using namespace mshadow::expr; - CHECK_EQ(out_grad.size(), 1); - CHECK_EQ(in_data.size(), 1); - CHECK_EQ(out_data.size(), 2); - Stream *s = ctx.get_stream(); - Tensor grad = mkl_experimental_direct_get( - out_grad[lrn_enum::kOut], s); - Tensor data = mkl_experimental_direct_get( - in_data[lrn_enum::kData], s); - Tensor grad_in = mkl_experimental_direct_get( - in_grad[lrn_enum::kData], s); - dnnError_t e; - void* lrn_res[dnnResourceNumber]; - lrn_res[dnnResourceDiffDst] = - bwd_top_diff_->get_converted_prv(grad.dptr_, true, out_grad[lrn_enum::kOut]); - lrn_res[dnnResourceWorkspace] = lrn_buffer_; - lrn_res[dnnResourceSrc] = - fwd_bottom_data_->get_converted_prv(data.dptr_, false, in_data[lrn_enum::kData]); - - lrn_res[dnnResourceDiffSrc] = bwd_bottom_diff_->get_output_ptr( - grad_in.dptr_, bwd_bottom_diff_, in_grad[lrn_enum::kData]); - e = dnnExecute(lrnBwd, lrn_res); - CHECK_EQ(e, E_SUCCESS); - } - - private: - LRNParam param_; - int size_; - int pre_pad_; - DType alpha_; - DType beta_; - DType k_; - int num_; - int channels_; - int height_; - int width_; - bool init_mkldnn_; - - private: - dnnPrimitive_t lrnFwd, lrnBwd; - std::shared_ptr > fwd_top_data_; - std::shared_ptr > fwd_bottom_data_; - - std::shared_ptr > bwd_top_diff_; - std::shared_ptr > bwd_bottom_diff_; - - DType *lrn_buffer_; -}; // class LocalResponseNormOp -} // namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_MKL_MKL_LRN_INL_H_ - diff --git a/src/operator/mkl/mkl_memory-inl.h b/src/operator/mkl/mkl_memory-inl.h deleted file mode 100644 index 71af10254b2a..000000000000 --- a/src/operator/mkl/mkl_memory-inl.h +++ /dev/null @@ -1,137 +0,0 @@ -/******************************************************************************* -* Copyright 2016 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -* \file mkl_memory-inl.h -* \brief -* \author lingyan.guo@intel.com -* zhenlin.luo@intel.com -* -*******************************************************************************/ -#ifndef MXNET_OPERATOR_MKL_MKL_MEMORY_INL_H_ -#define MXNET_OPERATOR_MKL_MKL_MEMORY_INL_H_ - - -#include -#include -#include -#include "mkl_cppwrapper.h" - -namespace mxnet { - -template -struct MKLMemoryDescriptorBase : public PrvMemDescr, - public std::enable_shared_from_this > { - MKLMemoryDescriptorBase() : layout_usr(NULL), layout_int(NULL), - convert_to_int(NULL), convert_from_int(NULL), convert_prv2prv(NULL), - name("UNKNOWN"), internal_ptr(NULL) {} - virtual ~MKLMemoryDescriptorBase() { - dnnLayoutDelete(layout_usr); - dnnLayoutDelete(layout_int); - if (internal_ptr != NULL) { - dnnReleaseBuffer(internal_ptr); - internal_ptr = NULL; - } - if (convert_to_int != NULL) { - dnnDelete(convert_to_int); - convert_to_int = NULL; - } - if (convert_from_int != NULL) { - dnnDelete(convert_from_int); - convert_from_int = NULL; - } - if (convert_prv2prv != NULL) { - dnnDelete(convert_prv2prv); - convert_prv2prv = NULL; - } - } - std::shared_ptr > get_shared_ptr() { - return this->shared_from_this(); - } - - dnnLayout_t layout_usr; - dnnLayout_t layout_int; - dnnPrimitive_t convert_to_int; - dnnPrimitive_t convert_from_int; - dnnPrimitive_t convert_prv2prv; - std::shared_ptr > descr_prv2prv_conversion; - - - std::string name; // for debugging purposes - void allocate() { - if (internal_ptr == NULL) { - int status = dnnAllocateBuffer( - reinterpret_cast(&internal_ptr), layout_int); - CHECK_EQ(status, E_SUCCESS) - << "Failed internal_ptr memory allocation with status " - << status << "\n"; - } - } - virtual void* prv_ptr(bool allocate_when_uninit = true) { - if (internal_ptr == NULL && allocate_when_uninit) - allocate(); - return internal_ptr; - } - inline bool conversion_needed() { - return (convert_to_int != NULL); - } - void create_conversions(); - void create_internal_layout(const dnnPrimitive_t primitive, - dnnResourceType_t type); - void create_user_layout(size_t dimension, const size_t size[], - const size_t strides[]); - void create_layouts( - const dnnPrimitive_t primitive, dnnResourceType_t type, - size_t dimension, const size_t size[], const size_t strides[]); - - virtual PrvDescrType get_descr_type() { - return PRV_DESCR_MKL2017; - } - virtual size_t prv_size() { - return dnnLayoutGetMemorySize(layout_int); - } - virtual size_t prv_count() { - return dnnLayoutGetMemorySize(layout_int) / sizeof(DType); - } - virtual void convert_from_prv(void* cpu_ptr); - virtual void convert_to_prv(void* cpu_ptr); - virtual bool layout_compare(std::shared_ptr other); - virtual void convert_from_other(std::shared_ptr other); - protected: - DType* internal_ptr; -}; - -template -struct MKLMemoryDescriptor : MKLMemoryDescriptorBase { - // The last get_converted_prv() argument is a hack for reusing - // in backward a conversion done already in the forward direction. - DType* get_converted_prv(DType *data_ptr, bool set_prv_ptr, - const TBlob &blob); - void* get_output_ptr(DType *data_ptr, std::shared_ptr > self_ptr, - const TBlob &blob, bool in_place = false); - bool copy_from(std::shared_ptr dnn_chunk); - MKLMemoryDescriptor() {} -}; - -template struct MKLData : MKLMemoryDescriptor { - static std::shared_ptr > create() { - return std::make_shared >(); - } -}; - -template struct MKLData; -template struct MKLData; - -} // namespace mxnet -#endif // MXNET_OPERATOR_MKL_MKL_MEMORY_INL_H_ diff --git a/src/operator/mkl/mkl_memory.cc b/src/operator/mkl/mkl_memory.cc deleted file mode 100644 index 7682fe1c1f37..000000000000 --- a/src/operator/mkl/mkl_memory.cc +++ /dev/null @@ -1,291 +0,0 @@ -/******************************************************************************* -* Copyright 2016 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -* \file mkl_memory.cc -* \brief -* \author lingyan.guo@intel.com -* zhenlin.luo@intel.com -* -*******************************************************************************/ -#include "../operator_common.h" - -#if MXNET_USE_MKL2017 == 1 -#include -#include "mkl_memory-inl.h" -#include "mkl_util-inl.h" - -namespace mxnet { - -template -void MKLMemoryDescriptorBase::create_conversions() { - int status; - if (this->convert_from_int) { - status = dnnDelete(this->convert_from_int); - CHECK_EQ(status, E_SUCCESS); - this->convert_from_int = NULL; - } - if (this->convert_to_int) { - status = dnnDelete(this->convert_to_int); - CHECK_EQ(status, E_SUCCESS); - this->convert_to_int = NULL; - } - if (layout_int - && !dnnLayoutCompare(layout_usr, layout_int)) { - CHECK(layout_usr); - status = dnnConversionCreate(&convert_to_int, layout_usr, - layout_int); - CHECK_EQ(status, E_SUCCESS) - << "Failed creation convert_to_int with status " - << status << " for buffer: " << this->name << "\n"; - status = dnnConversionCreate(&convert_from_int, layout_int, - layout_usr); - CHECK_EQ(status, E_SUCCESS) - << "Failed creation convert_from_int with status " - << status << " for buffer: " << this->name << "\n"; - } -} - -template -void MKLMemoryDescriptorBase::create_internal_layout( - const dnnPrimitive_t primitive, dnnResourceType_t type) { - int status; - if (this->layout_int) { - status = dnnLayoutDelete(this->layout_int); - CHECK_EQ(status, E_SUCCESS); - } - status = dnnLayoutCreateFromPrimitive( - &this->layout_int, primitive, type); - CHECK_EQ(status, E_SUCCESS) - << "Failed dnnLayoutCreateFromPrimitive with status " - << status << " for buffer: " << this->name << "\n"; - - if (this->layout_usr) - this->create_conversions(); -} - -template -void MKLMemoryDescriptorBase::create_user_layout( - size_t dimension, const size_t size[], const size_t strides[]) { - int status; - if (this->layout_usr) { - status = dnnLayoutDelete(this->layout_usr); - CHECK_EQ(status, E_SUCCESS); - } - - status = dnnLayoutCreate( - &this->layout_usr, dimension, size, strides); - CHECK_EQ(status, E_SUCCESS) << "Failed dnnLayoutCreate with status " - << status << " for buffer: " << this->name << "\n"; - - if (this->layout_int) - this->create_conversions(); -} - -template -void MKLMemoryDescriptorBase::create_layouts( - const dnnPrimitive_t primitive, dnnResourceType_t type, - size_t dimension, const size_t size[], const size_t strides[]) { - this->create_internal_layout(primitive, type); - this->create_user_layout(dimension, size, strides); -} - - -template -void MKLMemoryDescriptorBase::convert_from_prv(void* cpu_ptr) { - CHECK(cpu_ptr); - CHECK(this->convert_from_int); - int status; - void *convert_resources[dnnResourceNumber]; - - convert_resources[dnnResourceFrom] = this->prv_ptr(); - convert_resources[dnnResourceTo] = cpu_ptr; - status = dnnExecute(this->convert_from_int, convert_resources); - CHECK_EQ(status, 0) << "Conversion from prv failed with status " << status; -} - -template -void MKLMemoryDescriptorBase::convert_to_prv(void* cpu_ptr) { - CHECK(cpu_ptr); - CHECK(this->convert_to_int); - int status; - void *convert_resources[dnnResourceNumber]; - - convert_resources[dnnResourceFrom] = cpu_ptr; - convert_resources[dnnResourceTo] = this->prv_ptr(); - status = dnnExecute(this->convert_to_int, convert_resources); - CHECK_EQ(status, 0) << "Conversion from prv failed with status " << status; -} - - -template -bool MKLMemoryDescriptorBase::layout_compare( - std::shared_ptr other) { - CHECK_EQ(other->get_descr_type(), - PrvMemDescr::PRV_DESCR_MKL2017); - std::shared_ptr >other_descr = - std::static_pointer_cast > - (other); - - if (dnnLayoutCompare(other_descr->layout_int, - this->layout_int)) - return true; - else - return false; -} - -template -void MKLMemoryDescriptorBase::convert_from_other( - std::shared_ptr other) { - std::shared_ptr > other_descr = - std::static_pointer_cast > - (other); - - int status; - dnnPrimitive_t convert; - status = dnnConversionCreate(&convert, - other_descr->layout_int, this->layout_int); - - void *convert_resources[dnnResourceNumber]; - convert_resources[dnnResourceFrom] = other_descr->prv_ptr(); - convert_resources[dnnResourceTo] = this->prv_ptr(); - status = dnnExecute(convert, convert_resources); - CHECK_EQ(status, 0) << "Conversion from other failed with status " - << status; - - dnnDelete(convert); -} - - -template -Dtype* MKLMemoryDescriptor::get_converted_prv( - Dtype *cpu_ptr, bool set_prv_ptr, const TBlob &blob) { - Dtype* prv_ptr = NULL; - std::shared_ptr dnn_chunk = NULL; -#if MKL_EXPERIMENTAL == 1 - dnn_chunk = blob.Mkl_mem_; -#endif -#if MKL_EXPERIMENTAL == 1 - if (dnn_chunk != NULL) - prv_ptr = static_cast(dnn_chunk->prv_data()); -#endif - - if (this->convert_to_int != NULL) { -#if MKL_EXPERIMENTAL == 1 - int status; - void *convert_resources[dnnResourceNumber]; -#endif - if (prv_ptr == NULL) { - this->allocate(); - this->convert_to_prv(cpu_ptr); -#if MKL_EXPERIMENTAL == 1 - if (set_prv_ptr) { - dnn_chunk->set_prv_descriptor(this->get_shared_ptr(), true); - } -#endif - return this->internal_ptr; - } -#if MKL_EXPERIMENTAL == 1 - if (prv_ptr != NULL) { - std::shared_ptr > current_descr = - op::mkl_get_mem_desc(dnn_chunk); - if (!dnnLayoutCompare(current_descr->layout_int, - this->layout_int)) { - if (this->convert_prv2prv) { - CHECK_EQ(dnnLayoutCompare( - this->descr_prv2prv_conversion->layout_int, - this->layout_int), 0); - status = 0; - } else { - status = dnnConversionCreate(&this->convert_prv2prv, - current_descr->layout_int, this->layout_int); - if (status == 0) - this->descr_prv2prv_conversion = current_descr; - } - if (status != 0) { - this->allocate(); - convert_resources[dnnResourceFrom] = cpu_ptr; - convert_resources[dnnResourceTo] = - reinterpret_cast(this->internal_ptr); - status = dnnExecute(this->convert_to_int, convert_resources); - CHECK_EQ(status, 0) << "Conversion failed with status " << status; - } else { - this->allocate(); - convert_resources[dnnResourceFrom] = reinterpret_cast(prv_ptr); - convert_resources[dnnResourceTo] = - reinterpret_cast(this->internal_ptr); - status = dnnExecute(this->convert_prv2prv, convert_resources); - CHECK_EQ(status, 0) << "Conversion failed with status " << status; - } - if (set_prv_ptr) { - dnn_chunk->set_prv_descriptor(this->get_shared_ptr(), true); - } - return this->internal_ptr; - } else if (current_descr.get() != this) { - // MKL_DLOG(INFO) << "layout OK " - // << current_descr->name << " == " << this->name; - } - } -#endif - return const_cast(prv_ptr); - } else { - if (prv_ptr != NULL) { -#if MKL_EXPERIMENTAL == 1 - std::shared_ptr > other_descr = - std::static_pointer_cast > - (dnn_chunk->prv_descriptor_); - dnn_chunk->check_and_prv_to_cpu(cpu_ptr); -#endif - // printf("get_converted_prv release %s\n", other_descr->name.c_str()); - } - } - return cpu_ptr; -} - -template -void* MKLMemoryDescriptor::get_output_ptr(Dtype *data_ptr, - std::shared_ptr > self_ptr, const TBlob &blob, bool in_place) { -#if MKL_EXPERIMENTAL == 1 - std::shared_ptr dnn_chunk = blob.Mkl_mem_; -#endif - if (this->conversion_needed()) { - void * prv_ptr = this->prv_ptr(); -#if MKL_EXPERIMENTAL == 1 - if (!in_place) { - dnn_chunk->set_prv_descriptor(self_ptr); - } else { - Dtype * blob_prv = op::mkl_prv_data(blob); - if (blob_prv != NULL) - return blob_prv; - } -#endif - return prv_ptr; - } else { -#if MKL_EXPERIMENTAL == 1 - std::shared_ptr > other_descr = - std::static_pointer_cast > - (dnn_chunk->prv_descriptor_); - dnn_chunk->check_and_prv_to_cpu(data_ptr); -#endif - return data_ptr; - } -} - -template class MKLMemoryDescriptor; -template class MKLMemoryDescriptor; - -template class MKLMemoryDescriptorBase; -template class MKLMemoryDescriptorBase; -} // namespace mxnet -#endif diff --git a/src/operator/mkl/mkl_memory.h b/src/operator/mkl/mkl_memory.h deleted file mode 100644 index 13f1fd27b12b..000000000000 --- a/src/operator/mkl/mkl_memory.h +++ /dev/null @@ -1,123 +0,0 @@ -/******************************************************************************* -* Copyright 2016 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -* \file mkl_memory.cc -* \brief -* \author lingyan.guo@intel.com -* zhenlin.luo@intel.com -* -*******************************************************************************/ -#ifndef MXNET_OPERATOR_MKL_MKL_MEMORY_H_ -#define MXNET_OPERATOR_MKL_MKL_MEMORY_H_ - -#include -#include -#include - - -namespace mxnet { -// Base class -struct PrvMemDescr { - virtual void convert_from_prv(void* cpu_ptr) = 0; - virtual void convert_to_prv(void* cpu_ptr) = 0; - virtual void convert_from_other(std::shared_ptr other) = 0; - virtual void* prv_ptr(bool allocate_when_uninit = true) = 0; - // returns true for matching layouts - virtual bool layout_compare(std::shared_ptr other) = 0; - virtual size_t prv_count() = 0; - virtual size_t prv_size() = 0; - // This might help using prv_ptr_ by different accelerators/engines - enum PrvDescrType { - PRV_DESCR_MKL2017, - PRV_DESCR_MKLDNN - }; - virtual PrvDescrType get_descr_type() = 0; -}; - -#if MKL_EXPERIMENTAL == 1 -// Currently HEAD_AT_PRV do not free CPU data -enum SyncedHead { - HEAD_AT_CPU, - HEAD_AT_PRV, -}; -struct MKLMemHolder { - SyncedHead head_; - std::shared_ptr prv_descriptor_; - bool b_disable_prv_2_cpu; - bool b_eager_mode; - void disable_prv_2_cpu(bool flag) { - b_disable_prv_2_cpu = flag; - } - void set_eager_mode(bool eager_mode) { - b_eager_mode = eager_mode; - } - void set_prv_descriptor(std::shared_ptr descriptor, bool same_data = false) { - head_ = HEAD_AT_PRV; - prv_descriptor_ = descriptor; - } - std::shared_ptr get_prv_descriptor() { - return prv_descriptor_; - } - bool head_at_prv() { - return (head_ == HEAD_AT_PRV) ? true : false; - } - void* prv_data(bool allocate_when_uninit = true) { - if (head_ != HEAD_AT_PRV) { - return NULL; - } - if (prv_descriptor_ == NULL) { - LOG(FATAL) << " prv_descriptor_ is NULL"; - } - CHECK(prv_descriptor_.get()); - return reinterpret_cast(prv_descriptor_->prv_ptr(allocate_when_uninit)); - } - - int prv_count() { - if (head_ != HEAD_AT_PRV) { - return 0; - } - if (prv_descriptor_ == NULL) { - LOG(FATAL) << " prv_descriptor_ is NULL"; - } - CHECK(prv_descriptor_.get()); - return prv_descriptor_->prv_count(); - } - static std::shared_ptr create() { - return std::make_shared(); - } - void check_and_prv_to_cpu(void *dptr_) { - if (!b_disable_prv_2_cpu && head_ == HEAD_AT_PRV) { - CHECK(prv_descriptor_ != nullptr); - prv_descriptor_->convert_from_prv(dptr_); - // Because operator use CPU & maybe change it, change to CPU Flag - head_ = HEAD_AT_CPU; - } - if (b_disable_prv_2_cpu) { - b_disable_prv_2_cpu = false; - } - } - MKLMemHolder() : - head_(HEAD_AT_CPU), prv_descriptor_(nullptr), - b_disable_prv_2_cpu(false), b_eager_mode(false) {} -}; -#else -struct MKLMemHolder { - public: - virtual std::shared_ptr get_prv_descriptor() = 0; -}; -#endif - -} // namespace mxnet -#endif // MXNET_OPERATOR_MKL_MKL_MEMORY_H_ diff --git a/src/operator/mkl/mkl_pooling-inl.h b/src/operator/mkl/mkl_pooling-inl.h deleted file mode 100644 index 5662a61aebd3..000000000000 --- a/src/operator/mkl/mkl_pooling-inl.h +++ /dev/null @@ -1,357 +0,0 @@ -/******************************************************************************* -* Copyright 2016 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -* \file mkl_pooling-inl.h -* \brief -* \author zhenlin.luo@intel.com -* lingyan.guo@intel.com -* -*******************************************************************************/ - -#ifndef MXNET_OPERATOR_MKL_MKL_POOLING_INL_H_ -#define MXNET_OPERATOR_MKL_MKL_POOLING_INL_H_ -#include -#include -#include -#include "../operator_common.h" -#include "../nn/pooling-inl.h" -#include "./mkl_util-inl.h" - -namespace mxnet { -namespace op { - - -template -class MKLPoolingOp : public Operator { - public: - static std::string getName() { - return "MKLPoolingOp"; - } - explicit MKLPoolingOp(PoolingParam p) { - poolingFwd = static_cast(NULL); - poolingBwd = static_cast(NULL); - max_idx_data = static_cast(NULL); - fwd_top_data = MKLData::create(); - fwd_bottom_data = MKLData::create(); - bwd_top_diff = MKLData::create(); - bwd_bottom_diff = MKLData::create(); - this->param_ = p; - init_mkldnn_ = false; - } - virtual ~MKLPoolingOp() { - if (poolingFwd != NULL) { - dnnDelete(poolingFwd); - poolingFwd = NULL; - } - if (poolingBwd != NULL) { - dnnDelete(poolingBwd); - poolingBwd = NULL; - } - if (max_idx_data != NULL) { - dnnReleaseBuffer(max_idx_data); - max_idx_data = NULL; - } - } - - private: - void LayerSetUp(const mshadow::Tensor &data, - const mshadow::Tensor &out) { - channels_ = data.shape_[1]; - height_ = data.shape_[2]; - width_ = data.shape_[3]; - num_ = data.shape_[0]; - global_pooling_ = param_.global_pool; - if (global_pooling_) { - kernel_h_ = height_; - kernel_w_ = width_; - } else { - kernel_h_ = param_.kernel[0]; - kernel_w_ = param_.kernel[1]; - } - CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero."; - CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero."; - pad_h_ = param_.pad[0]; - pad_w_ = param_.pad[1]; - if (global_pooling_) { - stride_h_ = stride_w_ = 1; - } else { - stride_h_ = param_.stride[0]; - stride_w_ = param_.stride[1]; - } - if (global_pooling_) { - CHECK(pad_h_ == 0 && pad_w_ == 0 && stride_h_ == 1 && stride_w_ == 1) - << "With Global_pooling: true; only pad = 0 and stride = 1"; - } - if (pad_h_ != 0 || pad_w_ != 0) { - CHECK(param_.pool_type == pool_enum::kAvgPooling - || param_.pool_type == pool_enum::kMaxPooling) - << "Padding implemented only for average and max pooling."; - CHECK_LT(pad_h_, kernel_h_); - CHECK_LT(pad_w_, kernel_w_); - } - pooled_height_ = out.shape_[2]; - pooled_width_ = out.shape_[3]; - - size_t dim = 4; - size_t src_sizes[4], src_strides[4]; - size_t dst_sizes[4], dst_strides[4]; - src_sizes[0] = width_; - src_sizes[1] = height_; - src_sizes[2] = channels_; - src_sizes[3] = num_; - src_strides[0] = 1; - src_strides[1] = src_sizes[0]; - src_strides[2] = src_sizes[0] * src_sizes[1]; - src_strides[3] = src_sizes[0] * src_sizes[1] * src_sizes[2]; - dst_sizes[0] = pooled_width_; - dst_sizes[1] = pooled_height_; - dst_sizes[2] = src_sizes[2]; - dst_sizes[3] = src_sizes[3]; - dst_strides[0] = 1; - dst_strides[1] = dst_sizes[0]; - dst_strides[2] = dst_sizes[0] * dst_sizes[1]; - dst_strides[3] = dst_sizes[0] * dst_sizes[1] * dst_sizes[2]; - src_offset[0] = -pad_w_; - src_offset[1] = -pad_h_; - src_offset[2] = -pad_w_; - src_offset[3] = -pad_h_; - kernel_stride[0] = stride_w_; - kernel_stride[1] = stride_h_; - kernel_size[0] = kernel_w_; - kernel_size[1] = kernel_h_; - - // Names are for debugging only - fwd_bottom_data->name = "fwd_bottom_data @ " + getName(); - fwd_top_data->name = "fwd_top_data @ " + getName(); - bwd_top_diff->name = "bwd_top_diff @ " + getName(); - bwd_bottom_diff->name = "bwd_bottom_diff @ " + getName(); - - fwd_bottom_data->create_user_layout(dim, src_sizes, src_strides); - fwd_top_data->create_user_layout(dim, dst_sizes, dst_strides); - bwd_bottom_diff->create_user_layout(dim, src_sizes, src_strides); - bwd_top_diff->create_user_layout(dim, dst_sizes, dst_strides); - - // Primitives will be allocated during the first fwd pass - poolingFwd = NULL; - poolingBwd = NULL; - max_idx_data = NULL; - } - - public: - virtual void Forward(const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data, - const std::vector &aux_args) { - using namespace mshadow; - using namespace mshadow::expr; - CHECK_EQ(in_data.size(), 1); - CHECK_EQ(out_data.size(), 1); - Stream *s = ctx.get_stream(); - if (param_.kernel.ndim() >= 3) { - LOG(FATAL) << "Not implmented"; - } - Tensor data = mkl_experimental_direct_get( - in_data[pool_enum::kData], s); - Tensor out = mkl_experimental_direct_get( - out_data[pool_enum::kOut], s); - if (!init_mkldnn_) { - LayerSetUp(data, out); - init_mkldnn_ = true; - } - auto first_pass = false; - if (poolingFwd == NULL) first_pass = true; - - dnnAlgorithm_t algorithm = dnnAlgorithmPoolingMax; - - switch (param_.pool_type) { - case pool_enum::kMaxPooling: - algorithm = dnnAlgorithmPoolingMax; - break; - case pool_enum::kAvgPooling: - algorithm = dnnAlgorithmPoolingAvgIncludePadding; - - break; - default: - LOG(FATAL) << "Unknown pooling method."; - } - - dnnError_t status; - void* pooling_res[dnnResourceNumber]; - - void* bottom_data = NULL; -#if MKL_EXPERIMENTAL == 1 - bottom_data = - reinterpret_cast(mkl_prv_data(in_data[pool_enum::kData])); -#endif - dnnBorder_t border_type = dnnBorderZerosAsymm; - switch (param_.pooling_convention) { - case pool_enum::kFull: - border_type = dnnBorderZeros; - break; - case pool_enum::kValid: - border_type = dnnBorderZerosAsymm; - break; - default: - border_type = dnnBorderZerosAsymm; - break; - } - if (NULL == bottom_data) { - bottom_data = data.dptr_; - if (NULL == poolingFwd) { - status = dnnPoolingCreateForward(&poolingFwd, NULL, - algorithm, fwd_bottom_data->layout_usr, - kernel_size, kernel_stride, - src_offset, border_type); - CHECK_EQ(status, E_SUCCESS); - // Now create poolingBwd - status = dnnPoolingCreateBackward(&poolingBwd, NULL, - algorithm, fwd_bottom_data->layout_usr, - kernel_size, kernel_stride, - src_offset, border_type); - CHECK_EQ(status, E_SUCCESS); - } - } -#if MKL_EXPERIMENTAL == 1 - if (NULL != bottom_data) { - if (NULL == poolingFwd) { - std::shared_ptr bottom_data_mem = in_data[pool_enum::kData].Mkl_mem_; - std::shared_ptr bottom_prv_descriptor = - bottom_data_mem->get_prv_descriptor(); - CHECK_EQ(bottom_prv_descriptor->get_descr_type(), - PrvMemDescr::PRV_DESCR_MKL2017); - std::shared_ptr > mem_descr - = std::static_pointer_cast>(bottom_prv_descriptor); - CHECK(mem_descr != nullptr); - fwd_bottom_data = mem_descr; - - status = dnnPoolingCreateForward(&poolingFwd, NULL, - algorithm, fwd_bottom_data->layout_int, - kernel_size, kernel_stride, - src_offset, border_type); - CHECK_EQ(status, E_SUCCESS); - fwd_top_data->create_internal_layout(poolingFwd, dnnResourceDst); - - // Now create poolingBwd - status = dnnPoolingCreateBackward(&poolingBwd, NULL, - algorithm, fwd_bottom_data->layout_int, - kernel_size, kernel_stride, - src_offset, border_type); - CHECK_EQ(status, E_SUCCESS); - bwd_top_diff->create_internal_layout(poolingFwd, dnnResourceDst); - bwd_bottom_diff->create_internal_layout(poolingFwd, dnnResourceSrc); - } - } -#endif - - if (first_pass) { - dnnLayout_t max_idx_datal = NULL; - status = dnnLayoutCreateFromPrimitive( - &max_idx_datal, poolingFwd, dnnResourceWorkspace); - CHECK_EQ(status, E_SUCCESS); - status = dnnAllocateBuffer(reinterpret_cast(&max_idx_data), max_idx_datal); - CHECK_EQ(status, E_SUCCESS); -#if MKL_EXPERIMENTAL == 0 - fwd_bottom_data->create_internal_layout(poolingFwd, dnnResourceSrc); - fwd_top_data->create_internal_layout(poolingFwd, dnnResourceDst); - bwd_top_diff->create_internal_layout(poolingBwd, dnnResourceDiffDst); - bwd_bottom_diff->create_internal_layout(poolingBwd, dnnResourceDiffSrc); -#endif - dnnLayoutDelete(max_idx_datal); - first_pass = false; - } - pooling_res[dnnResourceSrc] = bottom_data; - pooling_res[dnnResourceWorkspace] = max_idx_data; - - pooling_res[dnnResourceDst] = fwd_top_data->get_output_ptr( - out.dptr_, fwd_top_data, out_data[pool_enum::kOut]); - status = dnnExecute(poolingFwd, pooling_res); - CHECK_EQ(status, E_SUCCESS); -#if MKL_EXPERIMENTAL == 0 - if (fwd_top_data->conversion_needed()) { - fwd_top_data->convert_from_prv(out.dptr_); - } -#endif - } - virtual void Backward(const OpContext &ctx, - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &req, - const std::vector &in_grad, - const std::vector &aux_args) { - if (!req[0]) { - return; - } - using namespace mshadow; - using namespace mshadow::expr; - CHECK_EQ(out_grad.size(), 1); - CHECK_EQ(in_data.size(), 1); - CHECK_EQ(out_data.size(), 1); - CHECK_EQ(req.size(), 1); - CHECK_EQ(in_grad.size(), 1); - if (param_.kernel.ndim() >= 3) { - LOG(FATAL) << "Not implmented"; - } - Stream *s = ctx.get_stream(); - Tensor grad = mkl_experimental_direct_get( - out_grad[pool_enum::kOut], s); - Tensor input_grad = mkl_experimental_direct_get( - in_grad[pool_enum::kData], s); - dnnError_t e; - void* pooling_res[dnnResourceNumber]; - pooling_res[dnnResourceWorkspace] = reinterpret_cast(max_idx_data); - - pooling_res[dnnResourceDiffDst] = - bwd_top_diff->get_converted_prv(grad.dptr_, true, out_grad[pool_enum::kOut]); - - pooling_res[dnnResourceDiffSrc] = bwd_bottom_diff->get_output_ptr( - input_grad.dptr_, bwd_bottom_diff, in_grad[pool_enum::kData]); - e = dnnExecute(poolingBwd, pooling_res); - CHECK_EQ(e, E_SUCCESS); -#if MKL_EXPERIMENTAL == 0 - if (bwd_bottom_diff->conversion_needed()) { - bwd_bottom_diff->convert_from_prv(input_grad.dptr_); - } -#endif - } - - private: - PoolingParam param_; - int kernel_h_, kernel_w_; - int stride_h_, stride_w_; - int pad_h_, pad_w_; - int channels_, num_; - int height_, width_; - int pooled_height_, pooled_width_; - bool global_pooling_; - - private: - size_t kernel_size[2], - kernel_stride[4]; - int src_offset[4]; // 2*(dimension-2) - dnnPrimitive_t poolingFwd, poolingBwd; - DType *max_idx_data; - - std::shared_ptr > fwd_top_data; - std::shared_ptr > fwd_bottom_data; - std::shared_ptr > bwd_top_diff; - std::shared_ptr > bwd_bottom_diff; - bool init_mkldnn_; -}; // class MKLPoolingOp -} // namespace op -} // namespace mxnet - -#endif // MXNET_OPERATOR_MKL_MKL_POOLING_INL_H_ diff --git a/src/operator/mkl/mkl_relu-inl.h b/src/operator/mkl/mkl_relu-inl.h deleted file mode 100644 index 8d7ab5e1e2db..000000000000 --- a/src/operator/mkl/mkl_relu-inl.h +++ /dev/null @@ -1,272 +0,0 @@ -/******************************************************************************* -* Copyright 2016 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -* \file mkl_relu-inl.h -* \brief -* \author zhenlin.luo@intel.com -* lingyan.guo@intel.com -* -*******************************************************************************/ -#ifndef MXNET_OPERATOR_MKL_MKL_RELU_INL_H_ -#define MXNET_OPERATOR_MKL_MKL_RELU_INL_H_ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include "../operator_common.h" -#include "./mkl_util-inl.h" - -namespace mxnet { -namespace op { - -template -class MKLReluOp : public Operator { - public: - static std::string getName() { - return "MKLReluOp"; - } - MKLReluOp(): - reluFwd_(NULL), - reluBwd_(NULL) { - init_mkldnn_ = false; - fwd_top_data_ = MKLData::create(); - fwd_bottom_data_ = MKLData::create(); - bwd_top_diff_ = MKLData::create(); - bwd_bottom_diff_ = MKLData::create(); - } - - ~MKLReluOp() { - if (reluFwd_ != NULL) { - dnnDelete(reluFwd_); - reluFwd_ = NULL; - } - if (reluBwd_ != NULL) { - dnnDelete(reluBwd_); - reluBwd_ = NULL; - } - } - - private: - void LayerSetUp(const mshadow::Tensor &data, - const mshadow::Tensor &out) { - size_t dim = 4; - size_t *sizes = new size_t[dim]; - size_t *strides = new size_t[dim]; - for (size_t d = 0; d < dim; ++d) { - (sizes)[d] = data.shape_[dim - 1 - d]; - (strides)[d] = (d == 0) ? 1 : (strides)[d - 1] * (sizes)[d - 1]; - } - // Names are for debugging only - fwd_bottom_data_->name = "fwd_bottom_data @ " + getName(); - fwd_top_data_->name = "fwd_top_data @ " + getName(); - bwd_bottom_diff_->name = "bwd_bottom_diff @ " + getName(); - bwd_top_diff_->name = "bwd_top_diff @ " + getName(); - fwd_bottom_data_->create_user_layout(dim, (sizes), (strides)); - fwd_top_data_->create_user_layout(dim, (sizes), (strides)); - bwd_bottom_diff_->create_user_layout(dim, (sizes), (strides)); - bwd_top_diff_->create_user_layout(dim, (sizes), (strides)); - delete[] sizes; - delete[] strides; - } - - public: - virtual void Forward(const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data, - const std::vector &aux_args) { - using namespace mshadow; - using namespace mshadow::expr; - CHECK_EQ(in_data.size(), 1); - CHECK_EQ(out_data.size(), 1); - Stream *s = ctx.get_stream(); - Tensor data; - Tensor out; - if (in_data[activation::kData].ndim() == 1) { - Shape<4> dshape = Shape4(in_data[activation::kData].shape_[0], 1, 1, 1); - data = mkl_experimental_direct_get_with_shape( - in_data[activation::kData], dshape, s); - out = mkl_experimental_direct_get_with_shape( - out_data[activation::kOut], dshape, s); - } else if (in_data[activation::kData].ndim() == 2) { - Shape<4> dshape = Shape4(in_data[activation::kData].shape_[0], - in_data[activation::kData].shape_[1], 1, 1); - data = mkl_experimental_direct_get_with_shape( - in_data[activation::kData], dshape, s); - out = mkl_experimental_direct_get_with_shape( - out_data[activation::kOut], dshape, s); - } else if (in_data[activation::kData].ndim() == 3) { - Shape<4> dshape = Shape4(in_data[activation::kData].shape_[0], - in_data[activation::kData].shape_[1], - in_data[activation::kData].shape_[2], 1); - data = mkl_experimental_direct_get_with_shape( - in_data[activation::kData], dshape, s); - out = mkl_experimental_direct_get_with_shape( - out_data[activation::kOut], dshape, s); - } else { - data = mkl_experimental_direct_get(in_data[activation::kData], s); - out = mkl_experimental_direct_get(out_data[activation::kOut], s); - } - if (!init_mkldnn_) { - LayerSetUp(data, out); - init_mkldnn_ = true; - } - void* bottom_data = NULL; -#if MKL_EXPERIMENTAL == 1 - bottom_data = - reinterpret_cast(mkl_prv_data(in_data[activation::kData])); -#endif -#if MKL_EXPERIMENTAL == 1 - if (bottom_data != NULL) { - if (reluFwd_ == NULL) { - std::shared_ptr > mem_descr = - mkl_get_mem_desc(in_data[activation::kData].Mkl_mem_); - DType negative_slope = 0; - dnnError_t e; - e = dnnReLUCreateForward(&reluFwd_, NULL, mem_descr->layout_int, - negative_slope); - CHECK_EQ(e, E_SUCCESS); - e = dnnReLUCreateBackward(&reluBwd_, NULL, mem_descr->layout_int, - mem_descr->layout_int, negative_slope); - CHECK_EQ(e, E_SUCCESS); - - fwd_bottom_data_ = mem_descr; - fwd_top_data_->create_internal_layout(reluFwd_, dnnResourceDst); - bwd_top_diff_->create_internal_layout(reluFwd_, dnnResourceDst); - bwd_bottom_diff_->create_internal_layout(reluFwd_, dnnResourceSrc); - } - } -#endif - if (bottom_data == NULL) { - bottom_data = data.dptr_; - if (reluFwd_ == NULL) { - dnnError_t e; - DType negative_slope = 0; - e = dnnReLUCreateForward(&reluFwd_, NULL, - fwd_bottom_data_->layout_usr, negative_slope); - CHECK_EQ(e, E_SUCCESS); - e = dnnReLUCreateBackward(&reluBwd_, NULL, - fwd_bottom_data_->layout_usr, fwd_bottom_data_->layout_usr, - negative_slope); - CHECK_EQ(e, E_SUCCESS); - } - } - dnnError_t e; - void* relu_res[dnnResourceNumber]; - relu_res[dnnResourceSrc] = bottom_data; - - relu_res[dnnResourceDst] = fwd_top_data_->get_output_ptr( - out.dptr_, fwd_top_data_, out_data[activation::kOut], (data.dptr_ == out.dptr_)); - e = dnnExecute(reluFwd_, relu_res); - CHECK_EQ(e, E_SUCCESS); -#if MKL_EXPERIMENTAL == 0 - if (fwd_top_data_->conversion_needed()) { - fwd_top_data_->convert_from_prv(out.dptr_); - } -#endif - } - virtual void Backward(const OpContext &ctx, - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &req, - const std::vector &in_grad, - const std::vector &aux_args) { - if (!req[0]) { - return; - } - using namespace mshadow; - using namespace mshadow::expr; - CHECK_EQ(out_grad.size(), 1); - CHECK(in_data.size() == 1 && in_grad.size() == 1); - CHECK_EQ(req.size(), 1); - Stream *s = ctx.get_stream(); - Tensor m_out_grad; - Tensor m_out_data; - Tensor m_in_grad; - - if (out_grad[activation::kOut].ndim() == 1) { - Shape<4> dshape = Shape4(out_grad[activation::kOut].shape_[0], 1, 1, 1); - m_out_grad = mkl_experimental_direct_get_with_shape( - out_grad[activation::kOut], dshape, s); - m_out_data = mkl_experimental_direct_get_with_shape( - out_data[activation::kOut], dshape, s); - m_in_grad = mkl_experimental_direct_get_with_shape( - in_grad[activation::kData], dshape, s); - } else if (out_grad[activation::kOut].ndim() == 2) { - Shape<4> dshape = Shape4(out_grad[activation::kOut].shape_[0], - out_grad[activation::kOut].shape_[1], 1, 1); - m_out_grad = mkl_experimental_direct_get_with_shape( - out_grad[activation::kOut], dshape, s); - m_out_data = mkl_experimental_direct_get_with_shape( - out_data[activation::kOut], dshape, s); - m_in_grad = mkl_experimental_direct_get_with_shape( - in_grad[activation::kData], dshape, s); - } else if (out_grad[activation::kOut].ndim() == 3) { - Shape<4> dshape = Shape4(out_grad[activation::kOut].shape_[0], - out_grad[activation::kOut].shape_[1], - out_grad[activation::kOut].shape_[2], 1); - m_out_grad = mkl_experimental_direct_get_with_shape( - out_grad[activation::kOut], dshape, s); - m_out_data = mkl_experimental_direct_get_with_shape( - out_data[activation::kOut], dshape, s); - m_in_grad = mkl_experimental_direct_get_with_shape( - in_grad[activation::kData], dshape, s); - } else { - m_out_grad = mkl_experimental_direct_get(out_grad[activation::kOut], s); - m_out_data = mkl_experimental_direct_get(out_data[activation::kOut], s); - m_in_grad = mkl_experimental_direct_get(in_grad[activation::kData], s); - } - dnnError_t e; - void* relu_res[dnnResourceNumber]; - - void* bottom_data = NULL; -#if MKL_EXPERIMENTAL == 1 - bottom_data = reinterpret_cast(mkl_prv_data(out_data[activation::kOut])); -#endif - if (NULL == bottom_data) { - bottom_data = reinterpret_cast(const_cast(m_out_data.dptr_)); - } - relu_res[dnnResourceSrc] = bottom_data; - relu_res[dnnResourceDiffDst] = bwd_top_diff_->get_converted_prv(m_out_grad.dptr_, - true, out_grad[activation::kOut]); - relu_res[dnnResourceDiffSrc] = bwd_bottom_diff_->get_output_ptr( - m_in_grad.dptr_, bwd_bottom_diff_, in_grad[activation::kData]); - e = dnnExecute(reluBwd_, relu_res); - CHECK_EQ(e, E_SUCCESS); -#if MKL_EXPERIMENTAL == 0 - if (bwd_bottom_diff_->conversion_needed()) { - bwd_bottom_diff_->convert_from_prv(m_in_grad.dptr_); - } -#endif - } - - private: - bool init_mkldnn_; - std::shared_ptr > fwd_top_data_; - std::shared_ptr > fwd_bottom_data_; - std::shared_ptr > bwd_top_diff_; - std::shared_ptr > bwd_bottom_diff_; - dnnPrimitive_t reluFwd_, reluBwd_; -}; // class MKLReluOp -} // namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_MKL_MKL_RELU_INL_H_ diff --git a/src/operator/mkl/mkl_util-inl.h b/src/operator/mkl/mkl_util-inl.h deleted file mode 100644 index 4ad786a2ce93..000000000000 --- a/src/operator/mkl/mkl_util-inl.h +++ /dev/null @@ -1,110 +0,0 @@ -/******************************************************************************* -* Copyright 2016 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -* \file mkl_util-inl.h -* \brief -* \author lingyan.guo@intel.com -* zhenlin.luo@intel.com -* -*******************************************************************************/ -#ifndef MXNET_OPERATOR_MKL_MKL_UTIL_INL_H_ -#define MXNET_OPERATOR_MKL_MKL_UTIL_INL_H_ -#include -#define MKLDNN_CALL(func) \ - { \ - dnnError_t status = (func); \ - CHECK_EQ(status, E_SUCCESS) << "MKL DNN call failed (status: " << status << ")."; \ - } - - -namespace mxnet { -namespace op { - -#if MKL_EXPERIMENTAL == 1 - template - inline DType * mkl_prv_data(const TBlob &b) { - std::shared_ptr bottom_data_mem = b.Mkl_mem_; - bool mem_valid = (bottom_data_mem != nullptr) && bottom_data_mem->head_at_prv(); - if (mem_valid) { - return reinterpret_cast(bottom_data_mem->prv_data()); - } - return NULL; - } - - template - inline int mkl_prv_count(const TBlob &b) { - std::shared_ptr bottom_data_mem = b.Mkl_mem_; - bool mem_valid = (bottom_data_mem != nullptr) && bottom_data_mem->head_at_prv(); - if (mem_valid) { - return bottom_data_mem->prv_count(); - } - return 0; - } -#endif - inline void mkl_set_priv_flag(const TBlob &b) { -#if MKL_EXPERIMENTAL == 1 - std::shared_ptr bottom_data_mem = b.Mkl_mem_; - bool mem_valid = (bottom_data_mem != nullptr) && bottom_data_mem->head_at_prv(); - if (mem_valid) { - bottom_data_mem->disable_prv_2_cpu(true); - } -#endif - } -#if MKL_EXPERIMENTAL == 1 - template - inline std::shared_ptr > mkl_get_mem_desc( - const std::shared_ptr data_mem) { - std::shared_ptr prv_descriptor = - data_mem->get_prv_descriptor(); - CHECK_EQ(prv_descriptor->get_descr_type(), - PrvMemDescr::PRV_DESCR_MKL2017); - std::shared_ptr > mem_descr - = std::static_pointer_cast> - (prv_descriptor); - CHECK(mem_descr != NULL); - return mem_descr; - } -#endif - template - inline mshadow::Tensor mkl_experimental_direct_get( - const TBlob &b, mshadow::Stream *s) { - mkl_set_priv_flag(b); - return b.get(s); - } - template - inline mshadow::Tensor mkl_experimental_direct_get_with_shape( - const TBlob &b, const mshadow::Shape &shape, mshadow::Stream *s) { - mkl_set_priv_flag(b); - return b.get_with_shape(shape, s); - } -} // namespace op -#if MKL_EXPERIMENTAL == 1 -inline void mkl_tblobs_prv_to_cpu(const std::vector &data) { - for (size_t i = 0; i < data.size(); i++) { - std::shared_ptr mem_holder = data[i].Mkl_mem_; - if (mem_holder != nullptr && mem_holder->b_eager_mode) { - mem_holder->check_and_prv_to_cpu(data[i].dptr_); - } - } -} -inline void mkl_set_tblob_eager_mode(const TBlob &data) { - std::shared_ptr mem_holder = data.Mkl_mem_; - if (mem_holder != nullptr) { - mem_holder->set_eager_mode(true); - } -} -#endif -} // namespace mxnet -#endif // MXNET_OPERATOR_MKL_MKL_UTIL_INL_H_