diff --git a/src/operator/nn/fully_connected-inl.h b/src/operator/nn/fully_connected-inl.h index e4bb11f6bc56..44af375486fb 100644 --- a/src/operator/nn/fully_connected-inl.h +++ b/src/operator/nn/fully_connected-inl.h @@ -36,6 +36,7 @@ #include "../elemwise_op_common.h" #include "../linalg.h" #include "../../common/utils.h" +#include "../tensor/broadcast_reduce_op.h" namespace mxnet { namespace op { @@ -169,7 +170,18 @@ void FCBackward(const OpContext &ctx, const FullyConnectedParam ¶m, // gradient of bias if (!param.no_bias) { Tensor gbias = in_grad[fullc::kBias].get(s); - Assign(gbias, req[fullc::kBias], sum_rows(grad)); + TBlob grad_blob = TBlob(grad); + TBlob gbias_blob = TBlob(gbias); + mxnet::TShape x(1, 0); + mxnet::TShape small; + if (shape_assign(&gbias_blob.shape_, Shape2(param.num_hidden, 1))) { + small = gbias_blob.shape_; + } else { + small = ReduceAxesShapeImpl(grad_blob.shape_, dmlc::optional(x), true, false); + } + ReduceAxesComputeImpl(ctx, {grad_blob}, {req[fullc::kBias]}, + {in_grad[fullc::kBias]}, small); } // gradient of data // Legacy approach shown here for comparison: diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc index a097357ef5a3..27f6595aee9e 100644 --- a/src/operator/nn/fully_connected.cc +++ b/src/operator/nn/fully_connected.cc @@ -316,11 +316,9 @@ NNVM_REGISTER_OP(_backward_FullyConnected) const FullyConnectedParam& params = nnvm::get(attrs.parsed); return params.no_bias ? 2 : 3; }) -#if MXNET_USE_MKLDNN == 1 .set_attr("FResourceRequest", [](const NodeAttrs& n) { return std::vector{ResourceRequest::kTempSpace}; }) -#endif .set_attr("TIsBackward", true) .set_attr("FInplaceOption", [](const NodeAttrs& attrs){ return std::vector >{{1, 0}}; diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 52fe69bbd434..2dd5fe3bcc9a 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -696,6 +696,27 @@ def test_symbol_pow(): check_symbolic_backward(test, [data_tmp, exp_tmp], [np.ones(shape)], [data_dir, exp_dir]) +@with_seed() +def test_fully_connected(): + data = mx.sym.var("data") + fc_weight = mx.sym.var("weight") + fc_bias = mx.sym.var("bias") + fc = mx.sym.FullyConnected(data=data, weight=fc_weight, bias=fc_bias, num_hidden=10, no_bias=False, name='fc') + data = mx.nd.random.uniform(shape=(5, 5, 5, 13), dtype=np.float32) + fc_weight = mx.nd.random.uniform(shape=(10, 325), dtype=np.float32) + fc_bias = mx.nd.random.uniform(shape=(10), dtype=np.float32) + fc_bias2 = mx.nd.random.uniform(shape=(10, 1), dtype=np.float32) + data_np = data.asnumpy().reshape(5, 325) + fc_weight_np = np.transpose(fc_weight.asnumpy()) + fc_bias_np = fc_bias.asnumpy() + res = np.dot(data_np, fc_weight_np) + fc_bias.asnumpy() + check_symbolic_forward(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np}, {'fc_output': res}) + check_numeric_gradient(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np}, + numeric_eps=1e-2, rtol=1e-4, atol=1e-2) + # TODO: Fix Bug #15032 when bias has ndim > 1 + #check_symbolic_forward(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias2.asnumpy()}, {'fc_output': res}) + + @with_seed() def test_pow_fn(): shape = (3, 4)