Skip to content

Commit

Permalink
Optimize move semantics of NodeEntry
Browse files Browse the repository at this point in the history
apache/tvm#2576
Making copies of shared_ptr is very expensive, 100x more than moving. This PR reduces lock contention by using move semantics in NNVM nodes
  • Loading branch information
Pedro Larroy authored and larroy committed Apr 16, 2019
1 parent b1fc3ec commit e380e41
Show file tree
Hide file tree
Showing 11 changed files with 27 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
branch = master
[submodule "3rdparty/tvm"]
path = 3rdparty/tvm
url = https://github.com/dmlc/tvm
url = https://github.com/larroy/tvm
[submodule "3rdparty/onnx-tensorrt"]
path = 3rdparty/onnx-tensorrt
url = https://github.com/onnx/onnx-tensorrt.git
Expand Down
2 changes: 1 addition & 1 deletion 3rdparty/tvm
2 changes: 1 addition & 1 deletion src/c_api/c_api_function.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ std::vector<nnvm::NodeEntry> Gradient(

std::vector<nnvm::NodeEntry> ret;
for (uint32_t i = 0; i < g->num_outputs(); ++i) {
ret.emplace_back(nnvm::NodeEntry{g, i, 0});
ret.emplace_back(std::move(g), i, 0);
}

return ret;
Expand Down
8 changes: 4 additions & 4 deletions src/executor/graph_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ nnvm::NodeEntry AggregateGradient(std::vector<nnvm::NodeEntry>&& v) {
ng->attrs.op = Op::Get("_zeros_without_dtype");
ng->attrs.name = "zeros_without_dtype";
ng->attrs.op->attr_parser(&(ng->attrs));
return nnvm::NodeEntry{ng, 0, 0};
return nnvm::NodeEntry(std::move(ng), 0, 0);
}

// remove zero in the sum. at least keep 1.
Expand All @@ -168,7 +168,7 @@ nnvm::NodeEntry AggregateGradient(std::vector<nnvm::NodeEntry>&& v) {
sum_node->attrs.dict["num_args"] = std::to_string(v.size());
sum_node->attrs.op->attr_parser(&(sum_node->attrs));
sum_node->inputs = std::move(v);
return nnvm::NodeEntry{sum_node, 0, 0};
return nnvm::NodeEntry(std::move(sum_node), 0, 0);
} else {
// use a stream line of plus instead
nnvm::NodeEntry ret = v[0];
Expand Down Expand Up @@ -198,7 +198,7 @@ nnvm::NodeEntry AggregateGradient(std::vector<nnvm::NodeEntry>&& v) {
x->attrs.op = ewise_plus_op;
x->attrs.name = os.str();
x->inputs = {ret, v[i]};
ret = nnvm::NodeEntry{x, 0, 0};
ret = nnvm::NodeEntry(std::move(x), 0, 0);
}
// identity node is used to avoid exposure of dummy plus node
// when its output get assigned to another space.
Expand Down Expand Up @@ -247,7 +247,7 @@ nnvm::Graph GraphExecutor::InitFullGraph(nnvm::Symbol symbol,
}
if (!need_grad_) return g;
for (size_t i = 0; i < g.outputs.size(); ++i) {
NodeEntry ngrad{nnvm::Node::Create(), 0, 0};
NodeEntry ngrad;
head_grad_entry_.emplace_back(AttrHint(ngrad, g.outputs[i]));
head_grad_map_[ngrad.node.get()] = i;
}
Expand Down
4 changes: 2 additions & 2 deletions src/imperative/cached_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ CachedOp::CachedOp(
{
ograd_entries_.reserve(fwd_graph_.outputs.size());
for (size_t i = 0; i < fwd_graph_.outputs.size(); ++i) {
ograd_entries_.emplace_back(NodeEntry{Node::Create(), 0, 0});
ograd_entries_.emplace_back();
}

std::vector<NodeEntry> xs;
Expand All @@ -169,7 +169,7 @@ CachedOp::CachedOp(
auto nid = idx.input_nodes()[i];
if (idx.mutable_input_nodes().count(nid)) continue;
fwd_input_to_grad_output_[i] = xs.size();
xs.emplace_back(NodeEntry{idx[nid].weak_ref.lock(), 0, 0});
xs.emplace_back(idx[nid].weak_ref.lock(), 0, 0);
}

CHECK_GT(xs.size(), 0)
Expand Down
2 changes: 1 addition & 1 deletion src/imperative/imperative.cc
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ std::vector<NDArray*> Imperative::Backward(
std::vector<NodeEntry> ograd_entries;
ograd_entries.reserve(ograds.size());
for (size_t i = 0; i < outputs.size(); ++i) {
ograd_entries.emplace_back(NodeEntry{Node::Create(), 0, 0});
ograd_entries.emplace_back();
AGInfo& info = AGInfo::Create(ograd_entries.back().node);
info.ctx = outputs[i]->ctx();
if (ograds[i] != nullptr) {
Expand Down
23 changes: 11 additions & 12 deletions src/nnvm/legacy_op_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -321,17 +321,18 @@ inline std::vector<NodeEntry> OpPropGradient(
const NodePtr& ptr,
const std::vector<NodeEntry>& out_grads) {
auto& prop = nnvm::get<ParsedOpProp>(ptr->attrs.parsed);
std::vector<NodeEntry> out_data(prop.outputs.size());
for (uint32_t i = 0; i < out_data.size(); ++i) {
out_data[i] = NodeEntry{ptr, i, 0};
std::vector<NodeEntry> out_data;
out_data.reserve(prop.outputs.size());
for (size_t i = 0; i < out_data.size(); ++i) {
out_data.emplace_back(ptr, i, 0);
}
std::vector<NodeEntry> in_data(
ptr->inputs.begin(), ptr->inputs.begin() + prop.arguments.size());
std::vector<NodeEntry> ograd(
out_grads.begin(), out_grads.begin() + prop.ptr->NumVisibleOutputs());
auto inputs = prop.ptr->BackwardInputs(ograd, in_data, out_data);
// add all the auxiliary data
for (uint32_t i = 0; i < prop.aux_states.size(); ++i) {
for (size_t i = 0; i < prop.aux_states.size(); ++i) {
inputs.emplace_back(ptr->inputs[i + prop.arguments.size()]);
}
NodePtr gnode = Node::Create();
Expand All @@ -340,17 +341,15 @@ inline std::vector<NodeEntry> OpPropGradient(
gnode->attrs = ptr->attrs;
gnode->attrs.op = back_op;
gnode->attrs.name = ptr->attrs.name + "_backward";
std::vector<NodeEntry> in_grad(prop.arguments.size());
for (uint32_t i = 0; i < prop.arguments.size(); ++i) {
in_grad[i] = NodeEntry{gnode, i, 0};
std::vector<NodeEntry> in_grad;
in_grad.reserve(prop.arguments.size() + prop.aux_states.size());
for (size_t i = 0; i < prop.arguments.size(); ++i) {
in_grad.emplace_back(gnode, i, 0);
}
// attach no gradient node to forbid gradient on aux_state
if (prop.aux_states.size() != 0) {
NodePtr ng = Node::Create();
ng->attrs.op = Op::Get("_NoGradient");
ng->attrs.name = "NoGradient";
for (uint32_t i = 0; i < prop.aux_states.size(); ++i) {
in_grad.emplace_back(NodeEntry{ng, 0, 0});
for (size_t i = 0; i < prop.aux_states.size(); ++i) {
in_grad.emplace_back(Node::Create(Op::Get("_NoGradient"), "NoGradient"), 0, 0);
}
}
return in_grad;
Expand Down
2 changes: 1 addition & 1 deletion src/operator/custom/custom.cc
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ std::vector<nnvm::NodeEntry> Gradient(
size_t i = static_cast<size_t>(t);
if (i >= params.num_outs + params.num_args) {
uint32_t idx = static_cast<uint32_t>(i-params.num_outs-params.num_args);
g->inputs.push_back(nnvm::NodeEntry{n, idx, 0});
g->inputs.push_back(n, idx, 0);
} else if (i >= params.num_outs) {
g->inputs.push_back(n->inputs[i-params.num_outs]);
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/operator/nn/lrn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ struct LRNGrad {
std::vector<nnvm::NodeEntry> heads;
heads.push_back(ograds[0]); // out_grad
heads.push_back(n->inputs[lrn_enum::kData]);
heads.emplace_back(nnvm::NodeEntry{n, lrn_enum::kTmpNorm, 0});
heads.emplace_back(n, lrn_enum::kTmpNorm, 0);
return MakeGradNode(op_name, n, heads, n->attrs.dict);
}
};
Expand Down
2 changes: 1 addition & 1 deletion src/operator/operator_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ inline std::vector<nnvm::NodeEntry> MakeNonlossGradNode(
p->inputs.insert(p->inputs.end(), inputs.begin(), inputs.end());
std::vector<nnvm::NodeEntry> ret;
for (uint32_t i = 0; i < p->num_outputs(); ++i) {
ret.emplace_back(nnvm::NodeEntry{p, i, 0});
ret.emplace_back(std::move(p), i, 0);
}
return ret;
}
Expand Down
6 changes: 3 additions & 3 deletions src/operator/tensor/elemwise_sum.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ std::vector<nnvm::NodeEntry> ElementWiseSumGrad(
nnvm::Op::Get("identity");
CHECK_EQ(ograds.size(), 1);
std::vector<nnvm::NodeEntry> ret;
nnvm::NodeEntry n_out{n, 0, 0};
for (size_t i = 0; i < n->inputs.size(); i++) {
nnvm::NodeEntry n_out(n, 0, 0);
for (size_t i = 0; i < n->inputs.size(); ++i) {
nnvm::NodePtr id_node = nnvm::Node::Create();
id_node->attrs.op = copy_op;
id_node->inputs = {ograds[0]};
ret.push_back(nnvm::NodeEntry{id_node, 0, 0});
ret.emplace_back(id_node, 0, 0);
}
return ret;
}
Expand Down

0 comments on commit e380e41

Please sign in to comment.