From d637af1339d273dd561910e7e82ccef56cba5e0c Mon Sep 17 00:00:00 2001 From: Chris Olivier Date: Tue, 6 Feb 2018 16:20:33 -0800 Subject: [PATCH] WIP unit tests (#25) * WIP unit tests * some backward items initialized --- CMakeLists.txt | 28 ++- tests/cpp/include/test_core_op.h | 198 +++++++++++++--- tests/cpp/include/test_util.h | 10 + tests/cpp/operator/batchnorm_test.cc | 320 +++++++++++++++++--------- tests/cpp/operator/dropout_perf.cc | 4 +- tests/cpp/operator/fully_conn_perf.cc | 4 +- 6 files changed, 415 insertions(+), 149 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cb8aa87c65c7..9f6d89b1beaf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -138,7 +138,14 @@ if(USE_VTUNE) endif() if(USE_MKL_IF_AVAILABLE) + if(USE_MKLDNN) + add_subdirectory(3rdparty/mkldnn) + include_directories(3rdparty/mkldnn/include) + list(APPEND mxnet_LINKER_LIBS mkldnn) + set(MKL_FOUND TRUE) + else() find_package(MKL) + endif() if(MKL_FOUND) include_directories(${MKL_INCLUDE_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/operator/mkl) @@ -199,9 +206,8 @@ if(NOT MSVC AND NOT APPLE) set(BEGIN_WHOLE_ARCHIVE -Wl,--whole-archive) set(END_WHOLE_ARCHIVE -Wl,--no-whole-archive) elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(BEGIN_WHOLE_ARCHIVE -Wl,-force_load) # force_load loads all symbols of the next library - #set(BEGIN_WHOLE_ARCHIVE -Wl,-all_load) # loads all symbols from all libraries - #set(END_WHOLE_ARCHIVE -Wl,-noall_load) + # using regular Clang or AppleClang + set(BEGIN_WHOLE_ARCHIVE -Wl,-force_load) endif() if(UNIX) @@ -270,6 +276,9 @@ if(USE_OPENMP) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp) list(REMOVE_ITEM mxnet_LINKER_LIBS iomp5) list(APPEND mxnet_LINKER_LIBS omp) + if(UNIX) + list(APPEND mxnet_LINKER_LIBS pthread) + endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") else() @@ -559,7 +568,18 @@ endif() if(USE_OPENCV) add_executable(im2rec "tools/im2rec.cc") - target_link_libraries(im2rec ${BEGIN_WHOLE_ARCHIVE} mxnet ${END_WHOLE_ARCHIVE} ${mxnet_LINKER_LIBS} ${OpenCV_LIBS} dmlc) + if(MSVC) + target_link_libraries(im2rec mxnet) + else() + target_link_libraries(im2rec ${BEGIN_WHOLE_ARCHIVE} mxnet_static ${END_WHOLE_ARCHIVE}) + endif() + target_link_libraries(im2rec + ${mxnet_LINKER_LIBS} + ${OpenCV_LIBS} + dmlc + ${nnvm_LINKER_LIBS} + ${pslite_LINKER_LIBS} + ) endif() target_link_libraries(mxnet PUBLIC dmlc) diff --git a/tests/cpp/include/test_core_op.h b/tests/cpp/include/test_core_op.h index ff5a9dde901c..019b5c932ac8 100644 --- a/tests/cpp/include/test_core_op.h +++ b/tests/cpp/include/test_core_op.h @@ -23,6 +23,7 @@ #include #include #include +#include #include "./test_op.h" #include "../../../src/imperative/imperative_utils.h" @@ -326,6 +327,75 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer #endif } + static nnvm::NodePtr GetBackwardDependency(const nnvm::NodePtr& node, + uint32_t num_inputs, + uint32_t num_outputs + //std::vector *p_save_inputs, + //std::vector *p_save_outputs + ) { + + const Op* op = node->op(); + if(op) { + if(!op->name.empty()) { + if(op->name == "BatchNorm") { + std::cout << "Imperative::GetBackwardDependency( " << op->name << " )" << std::endl; + } + } + } + + static auto& fgradient = nnvm::Op::GetAttr("FGradient"); +// std::vector& save_inputs = *p_save_inputs; +// std::vector& save_outputs = *p_save_outputs; +// save_inputs.resize(num_inputs); +// save_outputs.resize(num_outputs); +// std::fill(save_inputs.begin(), save_inputs.end(), false); +// std::fill(save_outputs.begin(), save_outputs.end(), false); + + node->inputs.clear(); + node->inputs.reserve(num_inputs); + for (uint32_t i = 0; i < num_inputs; ++i) { + node->inputs.emplace_back(nnvm::NodeEntry{nullptr, i, 0}); + } + + if (fgradient.count(node->op())) { + std::vector ograd_entries; + ograd_entries.reserve(num_outputs); + for (uint32_t i = 0; i < num_outputs; ++i) { + ograd_entries.emplace_back(nnvm::NodeEntry{nullptr, i, 1}); + } + const std::vector igrad_entries = fgradient[node->op()](node, ograd_entries); + + if(!igrad_entries.empty()) { + return igrad_entries[0].node; + } + +// for (const auto& i : igrad_entries) { +// if (i.node == nullptr && i.version == 0) { +// save_inputs[i.index] = true; +// } else if (i.node == node) { +// save_outputs[i.index] = true; +// } +// } +// DFSVisit(igrad_entries, [&](const nnvm::NodePtr& gnode) { +// if (!gnode || gnode == node) return; +// for (const auto& i : gnode->inputs) { +// if (i.node == nullptr && i.version == 0) { +// save_inputs[i.index] = true; +// } else if (i.node == node) { +// save_outputs[i.index] = true; +// } +// } +// }); + } + return nullptr; + } + + nnvm::NodePtr CalcBackwardPass() const { + nnvm::NodePtr node = nnvm::Node::Create(); + node->attrs = attrs_; + return GetBackwardDependency(node, inputs().size(), outputs().size()); + } + /*! * \brief Initialize the execution objects and execution data (only occurs once) * \param args Parameter arguments @@ -334,7 +404,8 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer void Init(const kwargs_t& in_args, const std::vector& inputs = {}, const std::vector& outputs = {}, - const CoreOpExecutor *backward_for_op = nullptr + const CoreOpExecutor *backward_for_op = nullptr, + nnvm::NodePtr bwd_node_ptr = nullptr ) { if (!initialized_) { initialized_ = true; @@ -353,33 +424,73 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer op_ = nnvm::Op::Get(op_name); CHECK_NOTNULL(op_); + nnvm::NodePtr bwd_node_ptr; + if(backward_for_op) { + bwd_node_ptr = backward_for_op->CalcBackwardPass(); + } + // Set up forward attrs_ = ParseAttrs(op_, args); int num_inputs = op_->num_inputs; - if (op_->get_num_inputs) + if (op_->get_num_inputs) { num_inputs = op_->get_num_inputs(attrs_); + } else if(backward_for_op) { + CHECK_NOTNULL(bwd_node_ptr.get()); + num_inputs = static_cast(bwd_node_ptr->inputs.size()); + } + +// if(backward_for_op) { +// const int num_fwd_outputs = backward_for_op->outputs().size(); +// num_inputs = std::max(num_fwd_outputs, num_inputs); +// } if (!inputs.empty()) { CHECK_EQ(inputs.size(), static_cast(num_inputs)); } - int inferred_num_outputs, num_visible_outputs; + int inferred_num_outputs /*, num_visible_outputs*/; - imperative::SetNumOutputs(op_, attrs_, num_inputs, &inferred_num_outputs, - &num_visible_outputs); +// imperative::SetNumOutputs(op_, attrs_, num_inputs, &inferred_num_outputs, +// &num_visible_outputs); + + if (op_->get_num_outputs) { + inferred_num_outputs = op_->get_num_outputs(attrs_); + } else { + inferred_num_outputs = op_->num_outputs; + } +// static auto& finput_names = Op::GetAttr("FListInputNames"); +// if(finput_names.count(op_)) { +// std::vector i_names = finput_names[op_](attrs_); +// const int i_name_count = i_names.size(); +// num_inputs = std::max(i_name_count, num_inputs); +// } + //using FListInputNames = std::function (const NodeAttrs& attrs)>; + +// static auto& grad_fun_map = Op::GetAttr("FGradient"); +// if(grad_fun_map.count(op_)) { +// auto grad_fun = grad_fun_map[op_]; +// nnvm::NodePtr nodeptr = std::make_shared(); +// nodeptr->attrs = attrs_; +// std::vector out_grads; +// std::vector entries = grad_fun(nodeptr, out_grads); +// const int grad_count = entries.size(); +// num_inputs = std::max(grad_count, num_inputs); +// } + + //CHECK_GE(inferred_num_outputs, num_visible_outputs); // Generic, all shapes the same. Probably this will need to be adjusted for more complex // operators such as dot - std::vector shapes; - for (size_t i = 0, n = std::max(num_visible_outputs, num_inputs); i < n; ++i) { - shapes.emplace_back(i < input_shapes_.size() ? input_shapes_[i] - : input_shapes_[input_shapes_.size() - 1]); + std::vector input_shapes; + for (size_t i = 0, n = num_inputs; i < n; ++i) { + input_shapes.emplace_back(i < input_shapes_.size() ? input_shapes_[i] + : input_shapes_[input_shapes_.size() - 1]); } std::vector inputs_p, outputs_p; if (!outputs.empty()) { - CHECK_EQ(outputs.size(), static_cast(num_visible_outputs)); + CHECK_EQ(outputs.size(), static_cast(inferred_num_outputs)); } inputs_.reserve(num_inputs); @@ -388,20 +499,36 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer outputs_p.reserve(inferred_num_outputs); for (size_t i = 0; i < static_cast(num_inputs); ++i) { - CHECK_LT(i, static_cast(shapes.size())); - inputs_.emplace_back(i < inputs.size() ? inputs[i] : CreateRandArray(shapes[i], + CHECK_LT(i, static_cast(input_shapes.size())); + inputs_.emplace_back(i < inputs.size() ? inputs[i] : CreateRandArray(input_shapes[i], ctx_.run_ctx.ctx)); inputs_p.emplace_back(&*inputs_.rbegin()); } - for (size_t i = 0; i < static_cast(inferred_num_outputs); ++i) { - // If supplied and valid, pass from the supplied outputs vector - // Otherwise use empty for forward pass, or zero-filled for backward pass - outputs_.emplace_back(i < outputs.size() - ? outputs[i] - : (backward_for_op ? CreateZeroArray(shapes[i], ctx_.run_ctx.ctx) - : NDArray())); - outputs_p.emplace_back(&*outputs_.rbegin()); + // Output arrays + if(outputs_.empty()) { + std::vector output_shapes; + static auto& finfer_shape = Op::GetAttr("FInferShape"); + if (finfer_shape.count(op_)) { + nnvm::FInferShape call_infer_shapes = finfer_shape[op_]; + output_shapes.resize(inferred_num_outputs); + call_infer_shapes(attrs_, &input_shapes, &output_shapes); + } else { + // TODO: this should be only if outputs param is empty + output_shapes = input_shapes; + output_shapes.resize(inferred_num_outputs); + } + CHECK_EQ(output_shapes.size(), inferred_num_outputs); + for (size_t i = 0; i < static_cast(inferred_num_outputs); ++i) { + // If supplied and valid, pass from the supplied outputs vector + // Otherwise use empty for forward pass, or zero-filled for backward pass + outputs_.emplace_back(i < outputs.size() ? outputs[i] + : (backward_for_op + ? CreateZeroArray(output_shapes[i], + ctx_.run_ctx.ctx) + : NDArray())); + outputs_p.emplace_back(&*outputs_.rbegin()); + } } if (!backward_for_op) { @@ -409,14 +536,14 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer imperative::SetShapeType(ctx_.run_ctx.ctx, attrs_, inputs_p, outputs_p, &dispatch_mode); } else { // Backward op, so set based upon inputs - CHECK_EQ(static_cast(num_visible_outputs), backward_for_op->inputs().size()); - for (int i = 0; i < num_visible_outputs; ++i) { - CHECK_LT(static_cast(i), shapes.size()); - // backward outputs should look like forward inputs - // TODO(cjolivier01): This check fails for dot product... - // Need better inference of backward shapes - // CHECK_EQ(backward_for_op->inputs()[i].shape(), outputs_[i].shape()); - } + //CHECK_EQ(static_cast(num_visible_outputs), backward_for_op->inputs().size()); +// for (int i = 0; i < num_visible_outputs; ++i) { +// CHECK_LT(static_cast(i), input_shapes.size()); +// // backward outputs should look like forward inputs +// // TODO(cjolivier01): This check fails for dot product... +// // Need better inference of backward shapes +// // CHECK_EQ(backward_for_op->inputs()[i].shape(), outputs_[i].shape()); +// } } std::vector req; @@ -450,6 +577,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer if (!no_backward) { CHECK_GE(bwd.size(), 1U) << "Can't automatically determine backward op name. Please specify"; + for (std::pair, std::string> &bw_item : bwd) { bw_item.first->set_verbose(verbose_); backward_.emplace_back(bw_item.first); @@ -575,6 +703,11 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer return backward_[0]->inputs(); } + const std::vector& bwd_inputs() const { + CHECK_EQ(backward_.size(), 1U); + return backward_[0]->inputs(); + } + /*! * \brief Backward outputs (i.e. input grad) * \return reference to NDArray vector of backward outputs @@ -584,6 +717,11 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer return backward_[0]->outputs(); } + const std::vector& bwd_outputs() const { + CHECK_EQ(backward_.size(), 1U); + return backward_[0]->outputs(); + } + void set_verbose(bool verbose) { verbose_ = verbose; } @@ -658,7 +796,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer class CoreOpProp { public: - void Init(const kwargs_t& kwargs) { kwargs_ = kwargs; } + virtual void Init(const kwargs_t& kwargs) { kwargs_ = kwargs; } const kwargs_t& GetArgs() const { return kwargs_; } private: kwargs_t kwargs_; @@ -687,7 +825,7 @@ inline void BasicRunCoreOpBidirectional(const bool isGPU, const char *op_name, const char *backward_op_name = "") { test::op::CoreOpExecutor op(isGPU, shapes); - op.set_verbose(false); + op.set_verbose(verbose); op.Init(op.ArgsWithOpName(op_kwargs, op_name, backward_op_name)); diff --git a/tests/cpp/include/test_util.h b/tests/cpp/include/test_util.h index a41e62392c28..e33b9a6e68c7 100644 --- a/tests/cpp/include/test_util.h +++ b/tests/cpp/include/test_util.h @@ -160,6 +160,16 @@ inline void fill(const TBlob& blob, const DType val) { } } +template +inline void try_fill(const TBlob *blob, const DType val) { + if(blob) { + DType *p1 = blob->dptr(); + for (size_t i = 0, n = blob->Size(); i < n; ++i) { + *p1++ = val; + } + } +} + template inline void fill(const TBlob& blob, const DType *valArray) { DType *p1 = blob.dptr(); diff --git a/tests/cpp/operator/batchnorm_test.cc b/tests/cpp/operator/batchnorm_test.cc index b670702936fa..aaa1add21b5f 100644 --- a/tests/cpp/operator/batchnorm_test.cc +++ b/tests/cpp/operator/batchnorm_test.cc @@ -32,11 +32,9 @@ #include "./test_core_op.h" #include "executor/exec_pass.h" -#if 0 - using namespace mxnet; -#define SIMPLE_DIMENSIONS 0 +#define SIMPLE_DIMENSIONS 1 #define MXNET_DUMP_C 0 #define DISABLE_VALIDATION 0 // If performance profiling, may do things // that cause validation to fail @@ -64,65 +62,141 @@ static constexpr int TIMING_DW = 28; /*! \brief BatchNorm-specific test data */ template class BNOperatorExecutor : public test::op::CoreOpExecutor { + using Super = typename test::op::CoreOpExecutor; public: BNOperatorExecutor(const bool isGPU, const TShape& inputShape, + const test::op::kwargs_t& kwargs, const bool hasWeightAndBias = false) : test::op::CoreOpExecutor(isGPU, { inputShape }) , hasWeightAndBias_(hasWeightAndBias) { + param_.Init(kwargs); } - enum ForwardInputs { kForData, kForGamma, kForBeta, kForMovingMean, kForMovingVar }; - enum ForwardOutputs { kForOut, kForMean, kForVar }; + //using BlobVectorType = typename test::op::CoreOpExecutor::BlobVectorType; + + enum ForwardInputs { kForInData, kForGamma, kForBeta, kForMovingMean, kForMovingVar }; + enum ForwardOutputs { kForOutData, kForOutMean, kForOutVar }; + enum BackwardInputs { kBackOutGrad, kBackOutGradMean, kBackOutGradVar, kBackData, kBackGamma, kBackBeta, kBackInMovingMean, kBackInMovingVar, kBackOutData, kBackOutMean, kBackOutVar }; - const NDArray *GetForwardInArray(const std::vector &arrs, ForwardInputs idx) { + enum WhichArray { + kForwardIn, + kForwardOut, + kBackwardIn, + kBackwardOut + }; + + const NDArray *GetForwardInArray(const int idx) const { + const std::vector &arrs = Super::inputs(); + CHECK_LT(idx, arrs.size()); return &arrs[idx]; } - const NDArray *GetForwardOutArray(const std::vector &arrs, ForwardOutputs idx) { + const NDArray *GetForwardOutArray(const int idx) const { + const std::vector &arrs = Super::outputs(); + CHECK_LT(idx, arrs.size()); return &arrs[idx]; } - const NDArray *GetBackwardOutArray(const std::vector &arrs, ForwardInputs idx) { + const NDArray *GetBackwardOutArray(const int idx) const { + const std::vector &arrs = Super::bwd_outputs(); + CHECK_LT(idx, arrs.size()); return &arrs[idx]; } - const NDArray *GetBackwardInArray(const std::vector &arrs, - const BatchNormParam& param, BackwardInputs idx) { + const NDArray *GetBackwardInArray(const int idx) const { + const std::vector &arrs = Super::bwd_inputs(); switch (idx) { - case kOutGrad: - return &arrs[kOutGrad]; - case kOutGradMean: - if (param.output_mean_var) - return &arrs[kOutGradMean]; - else + case kBackOutGrad: + CHECK_LT(kBackOutGrad, arrs.size()); + return &arrs[kBackOutGrad]; + case kBackOutGradMean: + if (param_.output_mean_var) { + CHECK_LT(kBackOutGradMean, arrs.size()); + return &arrs[kBackOutGradMean]; + } else { + CHECK(false); return nullptr; - case kOutGradVar: - if (param.output_mean_var) - return &arrs[kOutGradVar]; - else + } + case kBackOutGradVar: + if (param_.output_mean_var) { + return &arrs[kBackOutGradVar]; + } else { + CHECK(false); return nullptr; + } + default: { + const size_t index = param_.output_mean_var ? idx : idx - 2; + if(index < arrs.size()) { + return &arrs[index]; + } + return nullptr; + } + } + } + + const TBlob *GetBackwardInBlob(const int idx) const { + const NDArray * arr = GetBackwardInArray(idx); + if(arr) { + return &arr->data(); + } + return nullptr; + } + + const NDArray *GetArray(const WhichArray wa, const int idx) const { + switch(wa) { + case kForwardIn: + return GetForwardInArray(idx); + case kForwardOut: + return GetForwardOutArray(idx); + case kBackwardIn: + return GetBackwardOutArray(idx); + case kBackwardOut: default: - return &arrs[param.output_mean_var ? idx : idx - 2]; + CHECK(false); // need to check params + return nullptr; } } + inline const TBlob& Blob(const NDArray *arr) const { return arr->data(); } + + template + const TBlob& GetBlob(const WhichArray wa, const EnumType idx) const { + return GetArray(wa, idx)->data(); + } + void resetForward() override { + // Start by filling all inputs and outputs with an arbitrary value + for (size_t i = 0, n = Super::inputs().size(); i < n; ++i) { + const TBlob& out = Blob(&Super::inputs()[i]); + const int dtype = out.type_flag_; + MSHADOW_TYPE_SWITCH(dtype, DTypeX, { test::fill(out, DTypeX(0.1234)); }); + } + for (size_t i = 0, n = Super::outputs().size(); i < n; ++i) { + const TBlob& out = Blob(&Super::outputs()[i]); + const int dtype = out.type_flag_; + MSHADOW_TYPE_SWITCH(dtype, DTypeX, { test::fill(out, DTypeX(0.1234)); }); + } // Init input data MSHADOW_TYPE_SWITCH( - this->input_blobs()[mxnet::op::batchnorm::kData].type_flag_, + Blob(GetForwardInArray(kForInData)).type_flag_, + //this->c_.blob_input_vec_[mxnet::op::batchnorm::kData].type_flag_, DTypeX, { DTypeX val = 0; - test::patternFill(&this->input_blobs()[mxnet::op::batchnorm::kData], - [&val]{ return val += 1; }); }); + test::patternFill( + &Blob(GetForwardInArray(kForInData)), + //&this->c_.blob_input_vec_[mxnet::op::batchnorm::kData], + [&val]{ return val += 1; }); }); MSHADOW_TYPE_SWITCH( - this->input_blobs()[mxnet::op::batchnorm::kGamma].type_flag_, + Blob(GetForwardInArray(kForGamma)).type_flag_, + //this->c_.blob_input_vec_[mxnet::op::batchnorm::kGamma].type_flag_, DTypeX, { - const TBlob& blob = this->input_blobs()[mxnet::op::batchnorm::kGamma]; + //const TBlob& blob = this->c_.blob_input_vec_[mxnet::op::batchnorm::kGamma]; + const TBlob& blob = Blob(GetForwardInArray(kForGamma)); test::fill(blob, DTypeX(1)); if (hasWeightAndBias_) { if (blob.size(0) > 1) { @@ -131,9 +205,11 @@ class BNOperatorExecutor : public test::op::CoreOpExecutor { } }); MSHADOW_TYPE_SWITCH( - this->input_blobs()[mxnet::op::batchnorm::kBeta].type_flag_, + Blob(GetForwardInArray(kForBeta)).type_flag_, + //this->c_.blob_input_vec_[mxnet::op::batchnorm::kBeta].type_flag_, DTypeX, { - const TBlob& blob = this->input_blobs()[mxnet::op::batchnorm::kBeta]; + //const TBlob& blob = this->c_.blob_input_vec_[mxnet::op::batchnorm::kBeta]; + const TBlob& blob = Blob(GetForwardInArray(kForBeta)); if (!hasWeightAndBias_) { test::fill(blob, DTypeX(0)); } else { // This will cause forward pass check to fail when calculating sum == 0 @@ -146,71 +222,90 @@ class BNOperatorExecutor : public test::op::CoreOpExecutor { // Init the moving data (all mean = 0, all var = 1) MSHADOW_TYPE_SWITCH( - this->input_blobs()[mxnet::op::batchnorm::kMovingMean].type_flag_, + //this->c_.blob_aux_states_[mxnet::op::batchnorm::kMovingMean].type_flag_, + Blob(GetForwardInArray(kForMovingMean)).type_flag_, DTypeX, { - test::fill(this->input_blobs()[mxnet::op::batchnorm::kMovingMean], DTypeX(0)); + test::fill(Blob(GetForwardInArray(kForMovingMean)), DTypeX(0)); + //test::fill(this->c_.blob_aux_states_[mxnet::op::batchnorm::kMovingMean], DTypeX(0)); }); MSHADOW_TYPE_SWITCH( - this->input_blobs()[mxnet::op::batchnorm::kMovingVar].type_flag_, + Blob(GetForwardInArray(kForMovingVar)).type_flag_, + //this->c_.blob_aux_states_[mxnet::op::batchnorm::kMovingVar].type_flag_, DTypeX, { - test::fill(this->input_blobs()[mxnet::op::batchnorm::kMovingVar], DTypeX(1));}); - - for (size_t i = 0, n = this->output_blobs().size(); i < n; ++i) { - const int dtype = this->output_blobs()[i].type_flag_; - MSHADOW_TYPE_SWITCH(dtype, DTypeX, - { test::fill(this->output_blobs()[i], DTypeX(0.1234)); }); - } + //test::fill(this->c_.blob_aux_states_[mxnet::op::batchnorm::kMovingVar], DTypeX(1));}); + test::fill(Blob(GetForwardInArray(kForMovingVar)), DTypeX(1)); + }); } void resetBackward() override { + // Start by filling all backward inputs and outputs with an arbitrary value + for (size_t i = 0, n = Super::bwd_inputs().size(); i < n; ++i) { + const TBlob& out = Blob(&Super::bwd_inputs()[i]); + const int dtype = out.type_flag_; + MSHADOW_TYPE_SWITCH(dtype, DTypeX, { test::fill(out, DTypeX(0.5678)); }); + } + for (size_t i = 0, n = Super::bwd_outputs().size(); i < n; ++i) { + const TBlob& out = Blob(&Super::bwd_outputs()[i]); + const int dtype = out.type_flag_; + MSHADOW_TYPE_SWITCH(dtype, DTypeX, { test::fill(out, DTypeX(0.5678)); }); + } DType val = -.001; MSHADOW_TYPE_SWITCH( - this->output_blobs()[mxnet::op::batchnorm::kOut].type_flag_, + GetBlob(kBackwardIn, kBackOutGrad).type_flag_, + //this->c_.blob_out_grad_[mxnet::op::batchnorm::kOut].type_flag_, DTypeX, { - test::patternFill(&this->output_blobs()[mxnet::op::batchnorm::kOut], + test::patternFill( + &GetBlob(kBackwardIn, kBackOutGrad), + //&this->c_.blob_out_grad_[mxnet::op::batchnorm::kOut], [&val]{ return val += 1; }); }); // out-grad weights - if (mxnet::op::batchnorm::kGamma < this->output_blobs().size()) { + //if (mxnet::op::batchnorm::kGamma < this->c_.blob_out_grad_.size()) { + if (GetBackwardInBlob(kBackGamma)) { MSHADOW_TYPE_SWITCH( - this->output_blobs()[mxnet::op::batchnorm::kGamma].type_flag_, + GetBackwardInBlob(kBackGamma)->type_flag_, + //this->c_.blob_out_grad_[mxnet::op::batchnorm::kGamma].type_flag_, DTypeX, - { test::try_fill(this->output_blobs(), mxnet::op::batchnorm::kGamma, DTypeX(0.1)); }); + { test::try_fill(GetBackwardInBlob(kBackGamma), DTypeX(0.1)); }); } // out-grad biases - if (mxnet::op::batchnorm::kBeta < this->output_blobs().size()) { + if (GetBackwardInBlob(kBackBeta)) { MSHADOW_TYPE_SWITCH( - this->output_blobs()[mxnet::op::batchnorm::kBeta].type_flag_, + GetBackwardInBlob(kBackBeta)->type_flag_, + //this->c_.blob_out_grad_[mxnet::op::batchnorm::kGamma].type_flag_, DTypeX, - { test::try_fill(this->output_blobs(), mxnet::op::batchnorm::kBeta, DTypeX(0.1)); }); + { test::try_fill(GetBackwardInBlob(kBackBeta), DTypeX(0.1)); }); } + /* // in-grad MSHADOW_TYPE_SWITCH( - this->input_blobs()[mxnet::op::batchnorm::kData].type_flag_, + this->c_.blob_in_grad_[mxnet::op::batchnorm::kData].type_flag_, DTypeX, - { test::try_fill(this->input_blobs(), mxnet::op::batchnorm::kData, DTypeX(0)); }); + { test::try_fill(this->c_.blob_in_grad_, mxnet::op::batchnorm::kData, DTypeX(0)); }); // in-grad weights - if (mxnet::op::batchnorm::kGamma < this->input_blobs().size()) { + if (mxnet::op::batchnorm::kGamma < this->c_.blob_in_grad_.size()) { MSHADOW_TYPE_SWITCH( - this->input_blobs()[mxnet::op::batchnorm::kGamma].type_flag_, + this->c_.blob_in_grad_[mxnet::op::batchnorm::kGamma].type_flag_, DTypeX, - { test::try_fill(this->input_blobs(), mxnet::op::batchnorm::kGamma, DTypeX(0)); }); + { test::try_fill(this->c_.blob_in_grad_, mxnet::op::batchnorm::kGamma, DTypeX(0)); }); } // in-grad biases - if (mxnet::op::batchnorm::kBeta < this->input_blobs().size()) { + if (mxnet::op::batchnorm::kBeta < this->c_.blob_in_grad_.size()) { MSHADOW_TYPE_SWITCH( - this->input_blobs()[mxnet::op::batchnorm::kBeta].type_flag_, + this->c_.blob_in_grad_[mxnet::op::batchnorm::kBeta].type_flag_, DTypeX, - { test::try_fill(this->input_blobs(), mxnet::op::batchnorm::kBeta, DTypeX(0)); }); + { test::try_fill(this->c_.blob_in_grad_, mxnet::op::batchnorm::kBeta, DTypeX(0)); }); } + */ } const bool hasWeightAndBias_; // This will cause forward pass validation to fail + op::BatchNormParam param_; }; /*! \brief Validate batch norm test outputs */ @@ -375,29 +470,14 @@ class BatchNormValidator : public test::op::Validator { } public: - template + template static inline bool compare(const ExecutorType1& i1, const ExecutorType2& i2, - const typename - test::op::CoreOpExecutor::BlobVectorType bvt, - const size_t idx, + const typename ExecutorType1::WhichArray wa, + const EnumType idx, bool print = false) { - // Validate legacy data - auto *legacy1 = dynamic_cast *>(&i1); - auto *legacy2 = dynamic_cast *>(&i2); - CHECK_NOTNULL(legacy1); - CHECK_NOTNULL(legacy2); - const std::vector &bv1 = legacy1->getBlobVect(bvt); - const std::vector &bv2 = legacy2->getBlobVect(bvt); - - // If this is an invalid index, at least make sure the two blob vects - // are similarly too small for the index - if (bv1.size() <= idx) { - CHECK(bv1.size() == bv2.size()); - return true; - } - const TBlob &b1 = bv1[idx]; - const TBlob &b2 = bv2[idx]; + const TBlob& b1 = i1.GetBlob(wa, idx); + const TBlob& b2 = i2.GetBlob(wa, idx); if (print && test::debug_output) { test::print(RunContext(), &(std::cout << "Blob 1:"), b1, true, true); test::print(RunContext(), &(std::cout << "Blob 2:"), b2, true, true); @@ -408,7 +488,10 @@ class BatchNormValidator : public test::op::Validator { /*! \brief Check batch norm output */ template static void validateForward(const BNOperatorProp& data) { - const TBlob& outputBlob = data.output_blobs()[mxnet::op::batchnorm::kData]; + //const TBlob& outputBlob = data.output_blobs()[mxnet::op::batchnorm::kData]; + const TBlob& outputBlob = data.GetBlob(BNOperatorProp::kForwardOut, + BNOperatorProp::kForOutData); + test::print(RunContext(), &(std::cout << "Fwd Output Blob:"), outputBlob, true, true); switch (outputBlob.ndim()) { case 3: checkBatchNorm1D(&outputBlob); @@ -429,24 +512,25 @@ class BatchNormValidator : public test::op::Validator { template static void compare( const test::op::OpInfo>& info_1, - const test::op::OpInfo>& info_2) { + const test::op::OpInfo>& info_2) { // Input EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, - test::op::CoreOpExecutor::kInput, - mxnet::op::batchnorm::kData)); + BNOperatorExecutor::kForwardIn, + BNOperatorExecutor::kForInData)); EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, - test::op::CoreOpExecutor::kInput, - mxnet::op::batchnorm::kGamma)); + BNOperatorExecutor::kForwardIn, + BNOperatorExecutor::kForGamma)); EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, - test::op::CoreOpExecutor::kInput, - mxnet::op::batchnorm::kBeta)); + BNOperatorExecutor::kForwardIn, + BNOperatorExecutor::kForBeta)); // Output EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, - test::op::CoreOpExecutor::kOutput, - mxnet::op::batchnorm::kOut)); + BNOperatorExecutor::kForwardOut, + BNOperatorExecutor::kForOutData)); CHECK_EQ(info_2.prop_->getParam().use_global_stats, info_1.prop_->getParam().use_global_stats); +#if 0 #if MXNET_USE_CUDNN != 1 /* CUDNN takes a different approach here on first pass */ // Aux EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, @@ -475,6 +559,7 @@ class BatchNormValidator : public test::op::Validator { test::op::CoreOpExecutor::kOutGrad, mxnet::op::batchnorm::kData)); } +#endif } }; @@ -528,17 +613,17 @@ static StreamType& dumpF(StreamType *os, *os << "= " << x << std::endl; *os << "=============================" << std::endl; } - typedef typename OperatorExecutor::BlobVectorType BlobVectorType; - PRT(os, *prop.executor_, BlobVectorType::kInput, mxnet::op::batchnorm::kData); - PRT(os, *prop.executor_, BlobVectorType::kInput, mxnet::op::batchnorm::kGamma); - PRT(os, *prop.executor_, BlobVectorType::kInput, mxnet::op::batchnorm::kBeta); - - PRT(os, *prop.executor_, BlobVectorType::kAux, mxnet::op::batchnorm::kMovingMean); - PRT(os, *prop.executor_, BlobVectorType::kAux, mxnet::op::batchnorm::kMovingVar); - - PRT(os, *prop.executor_, BlobVectorType::kOutput, mxnet::op::batchnorm::kOut); - PRT(os, *prop.executor_, BlobVectorType::kOutput, mxnet::op::batchnorm::kMean); - PRT(os, *prop.executor_, BlobVectorType::kOutput, mxnet::op::batchnorm::kVar); +// typedef typename OperatorExecutor::BlobVectorType BlobVectorType; +// PRT(os, *prop.executor_, BlobVectorType::kInput, mxnet::op::batchnorm::kData); +// PRT(os, *prop.executor_, BlobVectorType::kInput, mxnet::op::batchnorm::kGamma); +// PRT(os, *prop.executor_, BlobVectorType::kInput, mxnet::op::batchnorm::kBeta); +// +// PRT(os, *prop.executor_, BlobVectorType::kAux, mxnet::op::batchnorm::kMovingMean); +// PRT(os, *prop.executor_, BlobVectorType::kAux, mxnet::op::batchnorm::kMovingVar); +// +// PRT(os, *prop.executor_, BlobVectorType::kOutput, mxnet::op::batchnorm::kOut); +// PRT(os, *prop.executor_, BlobVectorType::kOutput, mxnet::op::batchnorm::kMean); +// PRT(os, *prop.executor_, BlobVectorType::kOutput, mxnet::op::batchnorm::kVar); } return *os; } @@ -555,15 +640,15 @@ static StreamType& dumpB(StreamType *os, *os << "=============================" << std::endl; } - typedef typename OperatorExecutor::BlobVectorType BlobVectorType; - PRT(os, *prop.executor_, BlobVectorType::kInGrad, mxnet::op::batchnorm::kData); - PRT(os, *prop.executor_, BlobVectorType::kInGrad, mxnet::op::batchnorm::kGamma); - PRT(os, *prop.executor_, BlobVectorType::kInGrad, mxnet::op::batchnorm::kBeta); - - PRT(os, *prop.executor_, BlobVectorType::kAux, mxnet::op::batchnorm::kMovingMean); - PRT(os, *prop.executor_, BlobVectorType::kAux, mxnet::op::batchnorm::kMovingVar); - - PRT(os, *prop.executor_, BlobVectorType::kOutGrad, mxnet::op::batchnorm::kOut); +// typedef typename OperatorExecutor::BlobVectorType BlobVectorType; +// PRT(os, *prop.executor_, BlobVectorType::kInGrad, mxnet::op::batchnorm::kData); +// PRT(os, *prop.executor_, BlobVectorType::kInGrad, mxnet::op::batchnorm::kGamma); +// PRT(os, *prop.executor_, BlobVectorType::kInGrad, mxnet::op::batchnorm::kBeta); +// +// PRT(os, *prop.executor_, BlobVectorType::kAux, mxnet::op::batchnorm::kMovingMean); +// PRT(os, *prop.executor_, BlobVectorType::kAux, mxnet::op::batchnorm::kMovingVar); +// +// PRT(os, *prop.executor_, BlobVectorType::kOutGrad, mxnet::op::batchnorm::kOut); } return *os; } @@ -596,7 +681,9 @@ static test::op::OpInfo TestBatchNormOperatorFor #endif test::op::OpInfo info = test::op::createOpAndInfoF< - OperatorProp, OperatorExecutor>(kwargs, isGPU, inputShape); + OperatorProp, OperatorExecutor>( + OperatorExecutor::ArgsWithOpName(kwargs, "BatchNorm", "_backward_BatchNorm"), + isGPU, inputShape, kwargs); info.executor_->initForward(*info.prop_, &info.in_type_); @@ -665,8 +752,10 @@ static test::op::OpInfoPair test BatchNormValidator::compare( *info_1.executor_, *info_2.executor_, - test::op::CoreOpExecutor::kInput, - mxnet::op::batchnorm::kData, false); + OperatorExecutor::kForwardIn, OperatorExecutor::kForInData, + //test::op::CoreOpExecutor::kInput, + //mxnet::op::batchnorm::kData, + false); if (!thisCount) { // return backward @@ -713,18 +802,22 @@ testForwardAndBackward(const bool isGPU, // NOTE: This should know which version to use (V1, mkl, etc) struct BatchNormCoreOpProp : public mxnet::test::op::CoreOpProp { - const mxnet::op::BatchNormParam& getParam() const { - CHECK(false); // Not implemented - static mxnet::op::BatchNormParam dummy; - return dummy; + + void Init(const mxnet::test::op::kwargs_t& kwargs) override { + mxnet::test::op::CoreOpProp::Init(kwargs); + params_.Init(kwargs, dmlc::parameter::kAllowUnknown); } + + const mxnet::op::BatchNormParam& getParam() const { return params_; } + + mxnet::op::BatchNormParam params_; }; template static test::op::OpInfoPair testBNForwardAndBackward2D(const bool isGPU, const TShape &inputShape, - const test::op::kwargs_t kwargs, + const test::op::kwargs_t& kwargs, const bool dumpC = false) { CHECK_EQ(inputShape.ndim(), 4); // V1 can only handle 2D return testForwardAndBackward v2_types = {mshadow::kFloat32, mshadow::kFloat64, mshadow::kFloat16}; @@ -1590,3 +1685,4 @@ TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu_ugs) { #endif // MXNET_USE_CUDA #endif + diff --git a/tests/cpp/operator/dropout_perf.cc b/tests/cpp/operator/dropout_perf.cc index c28b9bd48097..4132fcb22c62 100644 --- a/tests/cpp/operator/dropout_perf.cc +++ b/tests/cpp/operator/dropout_perf.cc @@ -44,7 +44,9 @@ TEST(DROPOUT_PERF, ExecuteBidirectional) { test::op::CoreOperatorRunner runner; kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Dropout", "_backward_Dropout"); - runner.RunGenericOperatorForward(false, { shape }, kwargs, 1); + runner.set_verbose(true); + //runner.RunGenericOperatorForward(false, { shape }, kwargs, 1); + runner.RunBidirectional(false, { shape }, kwargs, 1); } /*! diff --git a/tests/cpp/operator/fully_conn_perf.cc b/tests/cpp/operator/fully_conn_perf.cc index 42ded6a8e9b6..2283562dea2b 100644 --- a/tests/cpp/operator/fully_conn_perf.cc +++ b/tests/cpp/operator/fully_conn_perf.cc @@ -77,7 +77,7 @@ TEST(FULLY_CONNECTED, FullyConnectedTimingCPU) { }; } for (const TShape& shape : shapes) { - TShape shape2({250, shape.ProdShape(1, shape.ndim())}); + TShape shape2({250, static_cast(shape.ProdShape(1, shape.ndim()))}); kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "FullyConnected", "_backward_FullyConnected"); runner.TimingTest("Fully connected CPU", false, false, kwargs, 2, 10, @@ -113,7 +113,7 @@ TEST(FULLY_CONNECTED, FullyConnectedTimingGPU) { }; } for (const TShape& shape : shapes) { - TShape shape2({250, shape.ProdShape(1, shape.ndim())}); + TShape shape2({250, static_cast(shape.ProdShape(1, shape.ndim()))}); kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "FullyConnected", "_backward_FullyConnected"); runner.TimingTest("Fully connected GPU", true, false, kwargs, 2, 10,