Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[fix] missing input log higher order. #15331

Merged
merged 13 commits into from
Nov 19, 2019
12 changes: 12 additions & 0 deletions src/nnvm/node_op_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,18 @@ class NodeOpGen {
dependent_node->attrs.name + "_square",
{x}, nullptr, &dependent_node)};
}

nnvm::NodeEntry reciprocal(const nnvm::NodeEntry &x) {
return nnvm::NodeEntry{mxnet::op::MakeNode("reciprocal",
dependent_node->attrs.name + "_reciprocal",
{x}, nullptr, &dependent_node)};
}

nnvm::NodeEntry negative(const nnvm::NodeEntry &x) {
return nnvm::NodeEntry{mxnet::op::MakeNode("negative",
dependent_node->attrs.name + "_negative",
{x}, nullptr, &dependent_node)};
}
};

} // namespace util
Expand Down
73 changes: 33 additions & 40 deletions src/operator/tensor/elemwise_unary_op_logexp.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "elemwise_unary_op.h"
#include "./elemwise_binary_op-inl.h"
#include "../nn/mkldnn/mkldnn_ops-inl.h"
#include "../../nnvm/node_op_util.h"

namespace mxnet {
namespace op {
Expand Down Expand Up @@ -110,25 +111,23 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_log,
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
// ograds[0]: dL/dxgrad
// inputs[0]: dL/dy
// inputs[1]: x
// inputs[0]: dL/dy (ygrad)
// inputs[1]: x (ElemewiseGradUseIn)
// f(x) = y = log(x)
// f'(x) = 1/x
// f''(x) = -1 * (f'(x) * f'(x))
auto x = n->inputs[1];
auto dydx_mul_dldy = nnvm::NodeEntry{n}; // f'(x) * head_grads
auto dlogx = MakeNode("reciprocal", n->attrs.name + "_dlogx",
{n->inputs[1]}, nullptr, &n);
auto d2ydx2_mid = MakeNode("elemwise_mul", n->attrs.name + "_d2ydx2_mid",
{dydx_mul_dldy, nnvm::NodeEntry{dlogx}}, nullptr, &n);
auto d2ydx2 = MakeNode("negative", n->attrs.name + "_d2ydx2",
{nnvm::NodeEntry{d2ydx2_mid}}, nullptr, &n);
auto op = mxnet::util::NodeOpGen{n};

auto dlogx = op.reciprocal(x);
auto d2ydx2_mid = op.mul(dydx_mul_dldy, dlogx);
auto d2ydx2 = op.negative(d2ydx2_mid);

std::vector<nnvm::NodeEntry> ret;
ret.emplace_back(op.mul(ograds[0], dlogx));
ret.emplace_back(op.mul(ograds[0], d2ydx2));

ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
{ograds[0], nnvm::NodeEntry{dlogx}}, nullptr, &n));
ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_inp",
{ograds[0], nnvm::NodeEntry{d2ydx2}}, nullptr, &n));
return ret;
});

Expand All @@ -137,27 +136,24 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_log10,
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
// ograds[0]: dL/dxgrad
// inputs[0]: dL/dy
// inputs[1]: x
// inputs[0]: dL/dy (ygrad)
// inputs[1]: x (ElemewiseGradUseIn)
// f(x) = y = log10(x)
// f'(x) = 1 / (log(10) * x)
// f''(x) = -1 * (f'(x) * 1/x)
auto dldy = n->inputs[0];
auto x = n->inputs[1];
auto dydx_mul_dldy = nnvm::NodeEntry{n}; // f'(x) * head_grads
auto dydx = MakeNode("elemwise_div", n->attrs.name + "_dydx",
{n->inputs[0]}, nullptr, &n);
auto dlogx = MakeNode("reciprocal", n->attrs.name + "_dlogx",
{n->inputs[1]}, nullptr, &n);
auto d2ydx2_mid = MakeNode("elemwise_mul", n->attrs.name + "_d2ydx2_mid",
{dydx_mul_dldy, nnvm::NodeEntry{dlogx}}, nullptr, &n);
auto d2ydx2 = MakeNode("negative", n->attrs.name + "_d2ydx2",
{nnvm::NodeEntry{d2ydx2_mid}}, nullptr, &n);
auto op = mxnet::util::NodeOpGen{n};
auto dydx = op.div(dydx_mul_dldy, dldy);
auto dlogx = op.reciprocal(x);
auto d2ydx2_mid = op.mul(dydx_mul_dldy, dlogx);
auto d2ydx2 = op.negative(d2ydx2_mid);

std::vector<nnvm::NodeEntry> ret;
ret.emplace_back(op.mul(ograds[0], dydx));
ret.emplace_back(op.mul(ograds[0], d2ydx2));

ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
{ograds[0], nnvm::NodeEntry{dydx}}, nullptr, &n));
ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_inp",
{ograds[0], nnvm::NodeEntry{d2ydx2}}, nullptr, &n));
return ret;
});

Expand All @@ -166,27 +162,24 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_log2,
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
// ograds[0]: dL/dxgrad
// inputs[0]: dL/dy
// inputs[1]: x
// inputs[0]: dL/dy (ygrad)
// inputs[1]: x (ElemewiseGradUseIn)
// f(x) = y = log2(x)
// f'(x) = 1 / (log(2) * x)
// f''(x) = -1 * (f'(x) * 1/x)
auto dldy = n->inputs[0];
auto x = n->inputs[1];
auto dydx_mul_dldy = nnvm::NodeEntry{n}; // f'(x) * head_grads
auto dydx = MakeNode("elemwise_div", n->attrs.name + "_dydx",
{n->inputs[0]}, nullptr, &n);
auto dlogx = MakeNode("reciprocal", n->attrs.name + "_dlogx",
{n->inputs[1]}, nullptr, &n);
auto d2ydx2_mid = MakeNode("elemwise_mul", n->attrs.name + "_d2ydx2_mid",
{dydx_mul_dldy, nnvm::NodeEntry{dlogx}}, nullptr, &n);
auto d2ydx2 = MakeNode("negative", n->attrs.name + "_d2ydx2",
{nnvm::NodeEntry{d2ydx2_mid}}, nullptr, &n);
auto op = mxnet::util::NodeOpGen{n};
auto dydx = op.div(dydx_mul_dldy, dldy);
auto dlogx = op.reciprocal(x);
auto d2ydx2_mid = op.mul(dydx_mul_dldy, dlogx);
auto d2ydx2 = op.negative(d2ydx2_mid);

std::vector<nnvm::NodeEntry> ret;
ret.emplace_back(op.mul(ograds[0], dydx));
ret.emplace_back(op.mul(ograds[0], d2ydx2));

ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
{ograds[0], nnvm::NodeEntry{dydx}}, nullptr, &n));
ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_inp",
{ograds[0], nnvm::NodeEntry{d2ydx2}}, nullptr, &n));
return ret;
});

Expand Down