Skip to content

Commit

Permalink
[DoubleGrad] Enabled test_imperative_triple_grad test cases under eag…
Browse files Browse the repository at this point in the history
…er_mode (#41612)

* [DoubleGrad] Enabled double grad test cases in eager_mode for test_imperative_double_grad

* Fixed elementwise issue

* Addressed CI failures

* [DoubleGrad] Enabled test_imperative_triple_grad test cases under eager_mode

* Fixed minor issues
  • Loading branch information
jim19930609 committed Apr 18, 2022
1 parent a367fba commit 935e717
Show file tree
Hide file tree
Showing 13 changed files with 81 additions and 55 deletions.
11 changes: 5 additions & 6 deletions paddle/fluid/eager/auto_code_generator/eager_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2011,8 +2011,7 @@ static std::string GenerateSingleOpBase(
"egr::EagerUtils::TrySyncToVars(egr::EagerUtils::"
"RecoverTensorWrapper("
"&"
"this->%s, "
"nullptr)) },";
"this->%s)) },";
ins_contents_str +=
paddle::string::Sprintf(GRAD_INS_FWD_CONTENT_TEMPLATE,
grad_input_name, struct_fwd_input_name);
Expand Down Expand Up @@ -2058,15 +2057,15 @@ static std::string GenerateSingleOpBase(
const char* DISPENSABLE_GRAD_INS_FWD_CONTENT_TEMPLATE =
" if(this->%s.size() > 0) %s[\"%s\"] = "
"egr::EagerUtils::TrySyncToVars(egr::EagerUtils::"
"RecoverTensorWrapper(&this->%s, nullptr));\n";
"RecoverTensorWrapper(&this->%s));\n";
generated_grad_function_body += paddle::string::Sprintf(
DISPENSABLE_GRAD_INS_FWD_CONTENT_TEMPLATE, struct_fwd_input_name,
ins_name, grad_input_name, struct_fwd_input_name);
} else {
const char* DISPENSABLE_GRAD_INS_FWD_CONTENT_TEMPLATE =
" auto %s = egr::EagerUtils::RecoverTensorWrapper(&this->%s, "
"nullptr);\n if(%s.initialized()) %s[\"%s\"] = "
"egr::EagerUtils::TrySyncToVars(%s);\n";
" auto %s = egr::EagerUtils::RecoverTensorWrapper(&this->%s);\n"
" if(%s.initialized()) %s[\"%s\"] = "
" egr::EagerUtils::TrySyncToVars(%s);\n";
generated_grad_function_body += paddle::string::Sprintf(
DISPENSABLE_GRAD_INS_FWD_CONTENT_TEMPLATE, grad_input_name,
struct_fwd_input_name, grad_input_name, ins_name, grad_input_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
########################
ops_to_fill_zero_for_empty_grads = set([
"split_grad", "rnn_grad", "matmul_double_grad", "matmul_triple_grad",
"sigmoid_triple_grad, add_double_grad"
"sigmoid_double_grad", "sigmoid_triple_grad", "add_double_grad",
"add_triple_grad"
])

# For API dispatch used at python-level
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ class {} : public egr::GradNodeBase {{
{}
// SetAttributes
{}
// SetTensorWrappers
// Set TensorWrappers for Forward Inputs
{}
// SetGradOutMeta & SetEdges
{}
Expand All @@ -245,6 +245,8 @@ class {} : public egr::GradNodeBase {{
{}
{}
{}
{}
// Set TensorWrappers for Forward Outputs
{}
}}
"""
Expand Down Expand Up @@ -720,7 +722,8 @@ def GenerateNodeCreationCodes(self):
set_attributes_str = "\n".join(set_attributes_list)

# SetTensorWrappers
set_tensor_wrappers_list = []
set_input_tensor_wrappers_list = []
set_output_tensor_wrappers_list = []
num_fwd_outputs = len(forward_outputs_position_map.keys())
for name, (atype, is_fwd_input,
pos) in backward_forward_inputs_map.items():
Expand All @@ -732,6 +735,7 @@ def GenerateNodeCreationCodes(self):
set_tensor_wrappers = f"{indent}if({name}.get_ptr() != nullptr) grad_node->SetTensorWrapper{name}(*({name}.get_ptr()), true);"
else:
set_tensor_wrappers = f"{indent}grad_node->SetTensorWrapper{name}({name}, {need_input_data});"
set_input_tensor_wrappers_list.append(set_tensor_wrappers)
else:
if num_fwd_outputs > 1:
# Aligned with forward output position
Expand All @@ -743,8 +747,11 @@ def GenerateNodeCreationCodes(self):
set_tensor_wrappers = f"{indent}if({name}.get_ptr() != nullptr) grad_node->SetTensorWrapper{name}(*({name}.get_ptr()), false);"
else:
set_tensor_wrappers = f"{indent}grad_node->SetTensorWrapper{name}({name}, false);"
set_tensor_wrappers_list.append(set_tensor_wrappers)
set_tensor_wrappers_str = "\n".join(set_tensor_wrappers_list)
set_output_tensor_wrappers_list.append(set_tensor_wrappers)
set_input_tensor_wrappers_str = "\n".join(
set_input_tensor_wrappers_list)
set_output_tensor_wrappers_str = "\n".join(
set_output_tensor_wrappers_list)

# SetGradOutMeta & SetEdges
set_grad_out_meta_list = []
Expand Down Expand Up @@ -801,9 +808,10 @@ def GenerateNodeCreationCodes(self):

self.node_creation_str = FORWARD_BODY_TEMPLATE.format(
node_creation_event_str, pass_stop_gradient_args_str,
node_construction_str, set_attributes_str, set_tensor_wrappers_str,
set_grad_out_meta_str, set_edges_str, set_out_rank_str,
set_history_str, set_grad_in_meta_str, set_retain_grad_str)
node_construction_str, set_attributes_str,
set_input_tensor_wrappers_str, set_grad_out_meta_str, set_edges_str,
set_out_rank_str, set_history_str, set_grad_in_meta_str,
set_retain_grad_str, set_output_tensor_wrappers_str)

def run(self):
# Basic Validation Check
Expand Down Expand Up @@ -1296,7 +1304,7 @@ def GenerateNodeDefinition(self, grad_node_creation_str):
transformed_tensor_name = self.TransformToNextGradName(name)

is_optional = (name in self.optional_inputs)
tensor_wrapper_recover_str = f"{indent}auto {transformed_tensor_name} = egr::EagerUtils::RecoverTensorWrapper(&this->{tensor_wrapper_name}, this->shared_from_this());"
tensor_wrapper_recover_str = f"{indent}auto {transformed_tensor_name} = egr::EagerUtils::RecoverTensorWrapper(&this->{tensor_wrapper_name});"
if is_optional:
tensor_wrapper_recover_str += "\n" + CREATE_RECOVER_OPTIONAL_TENSOR_TEMPLATE.format(
transformed_tensor_name, transformed_tensor_name,
Expand Down
21 changes: 11 additions & 10 deletions paddle/fluid/eager/backward.cc
Original file line number Diff line number Diff line change
Expand Up @@ -731,16 +731,6 @@ std::vector<paddle::experimental::Tensor> RunBackward(
continue;
}

auto* next_node = next_node_shared.get();
if (!node_input_buffers_dict.count(next_node)) {
const auto& input_meta = next_node->InputMeta();
auto grad_tensor_holder =
std::make_unique<GradTensorHolder>(input_meta);
VLOG(6) << "Construct GradTensorHolder for grad node: "
<< next_node->name();
node_input_buffers_dict[next_node] = std::move(grad_tensor_holder);
}

PADDLE_ENFORCE_LT(
j, grad_output_tensors[i].size(),
paddle::platform::errors::Fatal(
Expand All @@ -760,8 +750,19 @@ std::vector<paddle::experimental::Tensor> RunBackward(
<< ", rank: " << j
<< " 's name is: " << grad_output_tensor.name();

auto* next_node = next_node_shared.get();
if (!node_input_buffers_dict.count(next_node)) {
const auto& input_meta = next_node->InputMeta();
auto grad_tensor_holder =
std::make_unique<GradTensorHolder>(input_meta);
VLOG(6) << "Construct GradTensorHolder for grad node: "
<< next_node->name();
node_input_buffers_dict[next_node] = std::move(grad_tensor_holder);
}

VLOG(6) << "Sum grad inputs for edge slot: " << edge_rank.first
<< ", rank: " << edge_rank.second;

node_input_buffers_dict[next_node]->add(
edge_rank.first, edge_rank.second, grad_output_tensor);

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/eager/custom_operator/custom_operator_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class RunCustomOpNode : public GradNodeBase {
std::vector<egr::TensorWrapper>* fwd_var) {
std::vector<paddle::experimental::Tensor> res;
for (size_t i = 0; i < fwd_var->size(); i++) {
res.emplace_back(fwd_var->at(i).recover(nullptr));
res.emplace_back(fwd_var->at(i).recover());
}
return res;
}
Expand Down
8 changes: 7 additions & 1 deletion paddle/fluid/eager/grad_node_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) {
if (!node || !node.get()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
}
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " (addr: " << this << ") "
<< " to " << meta->GetMutableGradNode()->name()
<< " (addr: " << meta->GetMutableGradNode().get() << ")";

adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
Expand All @@ -84,7 +88,9 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
}
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " to " << meta->GetMutableGradNode()->name();
<< this->name() << " (addr: " << this << ") "
<< " to " << meta->GetMutableGradNode()->name()
<< " (addr: " << meta->GetMutableGradNode().get() << ")";

adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/eager/grad_tensor_holder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
"got tensor: %s is empty please check you network "
"and make sure it creates grads.",
t.name()));

if (t.is_dense_tensor()) {
if (buffer_tensor.is_dense_tensor()) {
buffer_tensor = add_final_state_dygraph_function(t, buffer_tensor);
Expand Down
34 changes: 23 additions & 11 deletions paddle/fluid/eager/tensor_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,17 @@ class TensorWrapper {

intermidiate_tensor_.set_name(tensor.name() + "@Saved");

// If an output is marked "intermedaite", we won't create
// autograd_meta for it.
// In that case, simply skip OutRankInfo Copy
if (EagerUtils::nullable_autograd_meta(tensor)) {
out_rank_info_ = EagerUtils::OutRankInfo(tensor);
auto* tensor_autograd_meta = EagerUtils::nullable_autograd_meta(tensor);
if (tensor_autograd_meta) {
auto autograd_meta = std::make_shared<AutogradMeta>(
Edge(nullptr, EagerUtils::OutRankInfo(tensor)));
autograd_meta->SetStopGradient(tensor_autograd_meta->StopGradient());
intermidiate_tensor_.set_autograd_meta(autograd_meta);
weak_grad_node_ = tensor_autograd_meta->GetMutableGradNode();
}
}

paddle::experimental::Tensor recover(
const std::shared_ptr<GradNodeBase>& grad_node) {
paddle::experimental::Tensor recover() {
VLOG(6) << "Recover tensor: " << intermidiate_tensor_.name()
<< " for wrapper";
if (!intermidiate_tensor_.defined()) {
Expand All @@ -99,9 +100,20 @@ class TensorWrapper {
// if it's full_reserved just return the full copy of tensor
paddle::experimental::Tensor recovered_tensor = intermidiate_tensor_;
if (!full_reserved_) {
std::shared_ptr<GradNodeBase> new_grad_node = grad_node;
auto p_ab_autograd_meta =
std::make_shared<AutogradMeta>(Edge(new_grad_node, out_rank_info_));
std::shared_ptr<GradNodeBase> new_grad_node = weak_grad_node_.lock();
if (new_grad_node) {
VLOG(3) << "Recovered TensorWrapper with GradNode "
<< new_grad_node->name() << " addr: " << new_grad_node.get();
} else {
VLOG(3) << "Recovered TensorWrapper with Empth GradNode";
}
auto* intermediate_autograd_meta =
EagerUtils::unsafe_autograd_meta(intermidiate_tensor_);
auto p_ab_autograd_meta = std::make_shared<AutogradMeta>(
Edge(new_grad_node, intermediate_autograd_meta->OutRankInfo()));
p_ab_autograd_meta->SetStopGradient(
intermediate_autograd_meta->StopGradient());

recovered_tensor.set_autograd_meta(
std::static_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
p_ab_autograd_meta));
Expand Down Expand Up @@ -149,8 +161,8 @@ class TensorWrapper {
private:
bool full_reserved_ = false;
bool no_need_buffer_ = false;
std::pair<size_t, size_t> out_rank_info_;
paddle::experimental::Tensor intermidiate_tensor_;
std::weak_ptr<egr::GradNodeBase> weak_grad_node_;
uint32_t inplace_version_snapshot_ = 0;
};
} // namespace egr
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ TEST(TensorWrapper, Basic) {
et1.set_autograd_meta(auto_grad0);
et1.set_name("et1");
auto tw0 = egr::TensorWrapper(et1, true);
auto recover_et1 = tw0.recover(std::make_shared<eager_test::GradTestNode>());
auto recover_et1 = tw0.recover();
CHECK_EQ(recover_et1.name(), std::string("et1"));
CHECK_EQ(egr::EagerUtils::OutRankInfo(recover_et1).first,
egr::EagerUtils::OutRankInfo(et1).first);
Expand All @@ -67,7 +67,7 @@ TEST(TensorWrapper, Basic) {
auto auto_grad1 = std::make_shared<egr::AutogradMeta>(edge1);
et2.set_autograd_meta(auto_grad1);
auto tw1 = egr::TensorWrapper(et2, false);
auto recover_et2 = tw1.recover(grad_test_node1);
auto recover_et2 = tw1.recover();
CHECK_EQ(recover_et2.name(), std::string("et2@Saved"));
CHECK_EQ(egr::EagerUtils::OutRankInfo(recover_et2).first,
egr::EagerUtils::OutRankInfo(et2).first);
Expand All @@ -76,7 +76,5 @@ TEST(TensorWrapper, Basic) {
// Test Raw recover
paddle::experimental::Tensor et3;
auto tw2 = egr::TensorWrapper(et3, true);
CHECK(
tw2.recover(std::make_shared<eager_test::GradTestNode>()).initialized() ==
false);
CHECK(tw2.recover().initialized() == false);
}
9 changes: 4 additions & 5 deletions paddle/fluid/eager/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -360,16 +360,15 @@ void EagerUtils::Output2Result(
}

paddle::experimental::Tensor EagerUtils::RecoverTensorWrapper(
TensorWrapper* tw, const std::shared_ptr<GradNodeBase>& grad_node) {
return tw->recover(grad_node);
TensorWrapper* tw) {
return tw->recover();
}

std::vector<paddle::experimental::Tensor> EagerUtils::RecoverTensorWrapper(
std::vector<TensorWrapper>* tw,
const std::shared_ptr<GradNodeBase>& grad_node) {
std::vector<TensorWrapper>* tw) {
std::vector<paddle::experimental::Tensor> ret;
for (auto& t : *tw) {
ret.emplace_back(t.recover(grad_node));
ret.emplace_back(t.recover());
}
return ret;
}
Expand Down
6 changes: 2 additions & 4 deletions paddle/fluid/eager/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,9 @@ class EagerUtils {
const std::shared_ptr<EagerVariable>& view_output_var);

// TensorWrapper Utils
static paddle::experimental::Tensor RecoverTensorWrapper(
TensorWrapper* tw, const std::shared_ptr<GradNodeBase>& grad_node);
static paddle::experimental::Tensor RecoverTensorWrapper(TensorWrapper* tw);
static std::vector<paddle::experimental::Tensor> RecoverTensorWrapper(
std::vector<TensorWrapper>* tw,
const std::shared_ptr<GradNodeBase>& grad_node);
std::vector<TensorWrapper>* tw);

// Intermidate needed remove this once we don't need legacy
// Inner Method
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,9 @@ def func_example_with_gradient_and_create_graph(self):
self.assertTrue(np.allclose(dddx_grad_actual, dddx_expected))

def test_all_cases(self):
if _in_legacy_dygraph():
self.func_exception()
self.func_example_with_gradient_and_create_graph()
with _test_eager_guard():
self.func_exception()
self.func_example_with_gradient_and_create_graph()

Expand Down Expand Up @@ -296,7 +298,8 @@ def func_example_with_gradient_and_create_graph(self):
self.assertTrue(np.allclose(dddx_grad_actual, dddx_expected))

def test_all_cases(self):
if _in_legacy_dygraph():
self.func_example_with_gradient_and_create_graph()
with _test_eager_guard():
self.func_example_with_gradient_and_create_graph()


Expand Down
2 changes: 1 addition & 1 deletion python/paddle/utils/code_gen/backward.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1411,7 +1411,7 @@
func : GeneralTernaryGradInferMeta
param : [out, fwd_grad_out, grad_grad_x]
kernel :
func : sigmoid_double_grad
func : sigmoid_triple_grad

- backward_api : silu_grad
forward : silu (Tensor x) -> Tensor(out)
Expand Down

1 comment on commit 935e717

@paddle-bot-old
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Congratulation! Your pull request passed all required CI. You could ask reviewer(s) to approve and merge. 🎉

Please sign in to comment.