Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Refreshed branch quick_pr (#8643)
Browse files Browse the repository at this point in the history
  • Loading branch information
cjolivier01 authored and piiswrong committed Nov 17, 2017
1 parent 4d05bff commit 3107326
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 45 deletions.
5 changes: 3 additions & 2 deletions src/operator/tensor/elemwise_binary_broadcast_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,9 @@ void BinaryBroadcastBackwardUseIn(const nnvm::NodeAttrs& attrs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
TShape new_lshape, new_rshape, new_oshape;
bool need_bc = BinaryBroadcastShapeCompact(outputs[0].shape_, outputs[1].shape_, inputs[0].shape_,
&new_lshape, &new_rshape, &new_oshape);
const bool need_bc = BinaryBroadcastShapeCompact(outputs[0].shape_,
outputs[1].shape_, inputs[0].shape_,
&new_lshape, &new_rshape, &new_oshape) != 0;
if (!need_bc) {
ElemwiseBinaryOp::BackwardUseIn<xpu, LOP, ROP>(attrs, ctx, inputs, req, outputs);
} else {
Expand Down
44 changes: 44 additions & 0 deletions tests/cpp/include/test_core_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,50 @@ class CoreOpProp {
template<typename DType>
using CoreOperatorRunner = test::OperatorRunner<CoreOpProp, CoreOpExecutor<DType>>;


/*!
* \brief Rune a core op forward and backward
* \tparam DType Data type
* \param isGPU true if operation is to be run on the GPU
* \param op_kwargs Operator parameters
* \param op_name Operator name as registered with nnvm
* \param backward_op_name Backwards operator name as registered with nnvm
* If blank, the runner will attempt to determine the backwards operator. If it fails,
* an exception will be thrown.
* If the string is [none], then no backward operator will be created or executed
*/
template<typename DType = float>
inline void BasicRunCoreOpBidirectional(const bool isGPU,
bool verbose,
const kwargs_t& op_kwargs,
const std::vector<TShape>& shapes,
const char *op_name,
const char *backward_op_name = "") {
test::op::CoreOpExecutor<DType> op(isGPU, shapes);
op.set_verbose(false);

op.Init(op.ArgsWithOpName(op_kwargs, op_name, backward_op_name));

if (verbose) {
PRINT_NDARRAYS(op.ctx().run_ctx, op.inputs());
PRINT_NDARRAYS(op.ctx().run_ctx, op.outputs());
}
op.Execute();
if (verbose) {
PRINT_NDARRAYS(op.ctx().run_ctx, op.outputs());
}
if (op.HasBackward()) {
if (verbose) {
PRINT_NDARRAYS(op.ctx().run_ctx, op.bwd_inputs());
PRINT_NDARRAYS(op.ctx().run_ctx, op.bwd_outputs());
}
op.ExecuteBackward();
if (verbose) {
PRINT_NDARRAYS(op.ctx().run_ctx, op.bwd_outputs());
}
}
}

} // namespace op
} // namespace test
} // namespace mxnet
Expand Down
10 changes: 8 additions & 2 deletions tests/cpp/include/test_op_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,14 @@ class OperatorRunner {
std::stringstream ss;
ss << "Timing: " << COUNT << " iterations of " << count << " calls";
if (timing_shapes[0].ndim()) {
// TODO(cjolivier01): Print all shapes (if they differ)
ss << ", shape = " << timing_shapes[0] << std::endl << std::flush;
ss << ", shape = ";
for (size_t i = 0, n = timing_shapes.size(); i < n; ++i) {
if (i) {
ss << ", ";
}
ss << timing_shapes[i];
}
ss << std::endl << std::flush;
}
std::cout << ss.str();

Expand Down
4 changes: 2 additions & 2 deletions tests/cpp/misc/memory_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ TEST(MEMORY_TEST, MemsetAndMemcopyPerformance) {

start = test::perf::getNannoTickCount();
#pragma omp parallel for num_threads(GetOMPThreadCount())
for (int i = 0; i < test_size; ++i) {
for (int i = 0; i < static_cast<int>(test_size); ++i) {
src[i] = 42;
}
const uint64_t omp_set_time = test::perf::getNannoTickCount() - start;
Expand All @@ -94,7 +94,7 @@ TEST(MEMORY_TEST, MemsetAndMemcopyPerformance) {

start = test::perf::getNannoTickCount();
#pragma omp parallel for num_threads(GetOMPThreadCount())
for (int i = 0; i < test_size; ++i) {
for (int i = 0; i < static_cast<int>(test_size); ++i) {
dest[i] = src[i];
}
const uint64_t omp_copy_time = test::perf::getNannoTickCount() - start;
Expand Down
2 changes: 1 addition & 1 deletion tests/cpp/operator/batchnorm_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1424,7 +1424,7 @@ static TShape MakeShape(const std::vector<index_t>& shape,
CHECK_LT(channelAxis, shape.size() + 1);
const index_t dim = index_t(shape.size()) + 1;
TShape newShape(dim);
for (size_t x = 0; x < channelAxis; ++x) {
for (size_t x = 0; x < static_cast<size_t>(channelAxis); ++x) {
newShape[x] = index_t(shape[x]);
}
newShape[channelAxis] = index_t(channelCount);
Expand Down
64 changes: 26 additions & 38 deletions tests/cpp/operator/broadcast_perf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,34 +31,35 @@ using namespace mxnet;

using kwargs_t = test::op::kwargs_t;

template<typename DType = float>
static void RunCoreOpBidirectional(const bool isGPU,
const kwargs_t& op_kwargs,
const char *op_name,
const char *backward_op_name = "") {
const std::vector<TShape> shapes = { {2, 3}, {2, 1} };
test::op::CoreOpExecutor<DType> op(isGPU, shapes);
op.set_verbose(false);

op.Init(op.ArgsWithOpName(op_kwargs, op_name, backward_op_name));

PRINT_NDARRAYS(op.ctx().run_ctx, op.inputs());
PRINT_NDARRAYS(op.ctx().run_ctx, op.outputs());
op.Execute();
PRINT_NDARRAYS(op.ctx().run_ctx, op.outputs());
if (op.HasBackward()) {
PRINT_NDARRAYS(op.ctx().run_ctx, op.bwd_inputs());
PRINT_NDARRAYS(op.ctx().run_ctx, op.bwd_outputs());
op.ExecuteBackward();
PRINT_NDARRAYS(op.ctx().run_ctx, op.bwd_outputs());
}
}

/*!
* \brief Generic bidirectional sanity test
*/
TEST(BROADCAST_PERF, ExecuteBidirectional) {
RunCoreOpBidirectional(false, {}, "broadcast_add", "_backward_broadcast_add");
test::op::BasicRunCoreOpBidirectional(false, true, {},
{ {2, 3}, {2, 1} },
"broadcast_add", "_backward_broadcast_add");
}

static const std::vector<std::vector<TShape>> broadcast_shapes() {
std::vector<std::vector<TShape>> shapes;
if (test::performance_run) {
shapes = {
{ {28, 28}, {28, 1} },
{ {64, 28}, {1, 28} },
{ {28, 28, 28}, {28, 28, 1} },
{ {128, 128}, {1, 128} },
{ {1024, 12, 256}, {1024, 1, 1} },
{ {2560, 1280}, {2560, 1} }
};
} else {
shapes = {
// Non-performance dataset acts as a sanity test
{ {28, 28}, {28, 1} },
{ {128, 128}, {128, 1} },
{ {28, 28, 28}, {28, 28, 1} }
};
}
return std::move(shapes);
}

template<typename DType = float>
Expand All @@ -74,20 +75,7 @@ static void RunCoreOpTimingTest(const bool isGPU,
runner.RunBidirectional(false, { {2, 3}, {2, 1} }, kwargs, 1);

// Do the performance runs
std::vector<std::vector<TShape>> shapes;
if (test::performance_run) {
shapes = {
{ {28, 28}, {28, 1} },
{ {18, 32} , {18, 1} },
{ {128, 128}, {128, 1} },
{ {2560, 1280}, {2560, 1} }
};
} else {
shapes = {
{ {28, 28}, {28, 1} },
{ {128, 128}, {128, 1} }
};
}
std::vector<std::vector<TShape>> shapes = broadcast_shapes();
const char *pu = isGPU ? "GPU" : "CPU";
for (const std::vector<TShape> &shape : shapes) {
runner.TimingTest(std::string(op_name) + " Operator " + pu, isGPU, false, kwargs,
Expand Down

0 comments on commit 3107326

Please sign in to comment.