Refreshed branch quick_pr (#8643)

apache · Nov 17, 2017 · 3107326 · 3107326
1 parent 4d05bff
commit 3107326
Show file tree

Hide file tree

Showing 6 changed files with 84 additions and 45 deletions.
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op.h b/src/operator/tensor/elemwise_binary_broadcast_op.h
@@ -249,8 +249,9 @@ void BinaryBroadcastBackwardUseIn(const nnvm::NodeAttrs& attrs,
                                   const std::vector<OpReqType>& req,
                                   const std::vector<TBlob>& outputs) {
   TShape new_lshape, new_rshape, new_oshape;
-  bool need_bc = BinaryBroadcastShapeCompact(outputs[0].shape_, outputs[1].shape_, inputs[0].shape_,
-                                             &new_lshape, &new_rshape, &new_oshape);
+  const bool need_bc = BinaryBroadcastShapeCompact(outputs[0].shape_,
+                                                   outputs[1].shape_, inputs[0].shape_,
+                                                   &new_lshape, &new_rshape, &new_oshape) != 0;
   if (!need_bc) {
     ElemwiseBinaryOp::BackwardUseIn<xpu, LOP, ROP>(attrs, ctx, inputs, req, outputs);
   } else {

diff --git a/tests/cpp/include/test_core_op.h b/tests/cpp/include/test_core_op.h
@@ -611,6 +611,50 @@ class CoreOpProp {
 template<typename DType>
 using CoreOperatorRunner = test::OperatorRunner<CoreOpProp, CoreOpExecutor<DType>>;
 
+
+/*!
+ * \brief Rune a core op forward and backward
+ * \tparam DType Data type
+ * \param isGPU true if operation is to be run on the GPU
+ * \param op_kwargs Operator parameters
+ * \param op_name Operator name as registered with nnvm
+ * \param backward_op_name Backwards operator name as registered with nnvm
+ *        If blank, the runner will attempt to determine the backwards operator. If it fails,
+ *        an exception will be thrown.
+ *        If the string is [none], then no backward operator will be created or executed
+ */
+template<typename DType = float>
+inline void BasicRunCoreOpBidirectional(const bool isGPU,
+                                        bool verbose,
+                                        const kwargs_t& op_kwargs,
+                                        const std::vector<TShape>& shapes,
+                                        const char *op_name,
+                                        const char *backward_op_name = "") {
+  test::op::CoreOpExecutor<DType> op(isGPU, shapes);
+  op.set_verbose(false);
+
+  op.Init(op.ArgsWithOpName(op_kwargs, op_name, backward_op_name));
+
+  if (verbose) {
+    PRINT_NDARRAYS(op.ctx().run_ctx, op.inputs());
+    PRINT_NDARRAYS(op.ctx().run_ctx, op.outputs());
+  }
+  op.Execute();
+  if (verbose) {
+    PRINT_NDARRAYS(op.ctx().run_ctx, op.outputs());
+  }
+  if (op.HasBackward()) {
+    if (verbose) {
+      PRINT_NDARRAYS(op.ctx().run_ctx, op.bwd_inputs());
+      PRINT_NDARRAYS(op.ctx().run_ctx, op.bwd_outputs());
+    }
+    op.ExecuteBackward();
+    if (verbose) {
+      PRINT_NDARRAYS(op.ctx().run_ctx, op.bwd_outputs());
+    }
+  }
+}
+
 }  // namespace op
 }  // namespace test
 }  // namespace mxnet

diff --git a/tests/cpp/include/test_op_runner.h b/tests/cpp/include/test_op_runner.h
@@ -145,8 +145,14 @@ class OperatorRunner {
     std::stringstream ss;
     ss << "Timing: " << COUNT << " iterations of " << count << " calls";
     if (timing_shapes[0].ndim()) {
-      // TODO(cjolivier01): Print all shapes (if they differ)
-      ss << ", shape = " << timing_shapes[0] << std::endl << std::flush;
+      ss << ", shape = ";
+      for (size_t i = 0, n = timing_shapes.size(); i < n; ++i) {
+        if (i) {
+          ss << ", ";
+        }
+        ss << timing_shapes[i];
+      }
+      ss << std::endl << std::flush;
     }
     std::cout << ss.str();
 

diff --git a/tests/cpp/misc/memory_test.cc b/tests/cpp/misc/memory_test.cc
@@ -79,7 +79,7 @@ TEST(MEMORY_TEST, MemsetAndMemcopyPerformance) {
 
       start = test::perf::getNannoTickCount();
       #pragma omp parallel for num_threads(GetOMPThreadCount())
-      for (int i = 0; i < test_size; ++i) {
+      for (int i = 0; i < static_cast<int>(test_size); ++i) {
         src[i] = 42;
       }
       const uint64_t omp_set_time = test::perf::getNannoTickCount() - start;
@@ -94,7 +94,7 @@ TEST(MEMORY_TEST, MemsetAndMemcopyPerformance) {
 
       start = test::perf::getNannoTickCount();
       #pragma omp parallel for num_threads(GetOMPThreadCount())
-      for (int i = 0; i < test_size; ++i) {
+      for (int i = 0; i < static_cast<int>(test_size); ++i) {
         dest[i] = src[i];
       }
       const uint64_t omp_copy_time = test::perf::getNannoTickCount() - start;

diff --git a/tests/cpp/operator/batchnorm_test.cc b/tests/cpp/operator/batchnorm_test.cc
@@ -1424,7 +1424,7 @@ static TShape MakeShape(const std::vector<index_t>& shape,
   CHECK_LT(channelAxis, shape.size() + 1);
   const index_t dim = index_t(shape.size()) + 1;
   TShape newShape(dim);
-  for (size_t x = 0; x < channelAxis; ++x) {
+  for (size_t x = 0; x < static_cast<size_t>(channelAxis); ++x) {
     newShape[x] = index_t(shape[x]);
   }
   newShape[channelAxis] = index_t(channelCount);

diff --git a/tests/cpp/operator/broadcast_perf.cc b/tests/cpp/operator/broadcast_perf.cc
@@ -31,34 +31,35 @@ using namespace mxnet;
 
 using kwargs_t = test::op::kwargs_t;
 
-template<typename DType = float>
-static void RunCoreOpBidirectional(const bool isGPU,
-                                   const kwargs_t& op_kwargs,
-                                   const char *op_name,
-                                   const char *backward_op_name = "") {
-  const std::vector<TShape> shapes = { {2, 3}, {2, 1} };
-  test::op::CoreOpExecutor<DType> op(isGPU, shapes);
-  op.set_verbose(false);
-
-  op.Init(op.ArgsWithOpName(op_kwargs, op_name, backward_op_name));
-
-  PRINT_NDARRAYS(op.ctx().run_ctx, op.inputs());
-  PRINT_NDARRAYS(op.ctx().run_ctx, op.outputs());
-  op.Execute();
-  PRINT_NDARRAYS(op.ctx().run_ctx, op.outputs());
-  if (op.HasBackward()) {
-    PRINT_NDARRAYS(op.ctx().run_ctx, op.bwd_inputs());
-    PRINT_NDARRAYS(op.ctx().run_ctx, op.bwd_outputs());
-    op.ExecuteBackward();
-    PRINT_NDARRAYS(op.ctx().run_ctx, op.bwd_outputs());
-  }
-}
-
 /*!
  * \brief Generic bidirectional sanity test
  */
 TEST(BROADCAST_PERF, ExecuteBidirectional) {
-  RunCoreOpBidirectional(false, {}, "broadcast_add", "_backward_broadcast_add");
+  test::op::BasicRunCoreOpBidirectional(false, true, {},
+                                        { {2, 3}, {2, 1} },
+                                        "broadcast_add", "_backward_broadcast_add");
+}
+
+static const std::vector<std::vector<TShape>> broadcast_shapes() {
+  std::vector<std::vector<TShape>> shapes;
+  if (test::performance_run) {
+    shapes = {
+      { {28,  28},  {28, 1} },
+      { {64,  28},  {1, 28} },
+      { {28,  28, 28},  {28, 28, 1} },
+      { {128, 128}, {1, 128} },
+      { {1024, 12, 256}, {1024, 1, 1} },
+      { {2560, 1280}, {2560, 1} }
+    };
+  } else {
+    shapes = {
+      // Non-performance dataset acts as a sanity test
+      { {28,  28},  {28, 1} },
+      { {128, 128}, {128, 1} },
+      { {28,  28, 28},  {28, 28, 1} }
+    };
+  }
+  return std::move(shapes);
 }
 
 template<typename DType = float>
@@ -74,20 +75,7 @@ static void RunCoreOpTimingTest(const bool isGPU,
   runner.RunBidirectional(false, { {2, 3}, {2, 1} }, kwargs, 1);
 
   // Do the performance runs
-  std::vector<std::vector<TShape>> shapes;
-  if (test::performance_run) {
-    shapes = {
-      { {28,  28},  {28, 1} },
-      { {18,  32} , {18, 1} },
-      { {128, 128}, {128, 1} },
-      { {2560, 1280}, {2560, 1} }
-    };
-  } else {
-    shapes = {
-      { {28,  28},  {28, 1} },
-      { {128, 128}, {128, 1} }
-    };
-  }
+  std::vector<std::vector<TShape>> shapes = broadcast_shapes();
   const char *pu = isGPU ? "GPU" : "CPU";
   for (const std::vector<TShape> &shape : shapes) {
     runner.TimingTest(std::string(op_name) + " Operator " + pu, isGPU, false, kwargs,