[Cherry-pick] Update layoutautotune for inplace (#45826) (#46226)

cherry-pick from #45826 LayoutAutotune 支持 inplace 类型的OP 根据 Add eager layout autotune #45409 修改意见调整UseAutotune 将LayoutAutotune判断放到controller中，与AMP 判断保持一致
PaddlePaddle · Sep 20, 2022 · c0324e8 · c0324e8
1 parent 7712ce1
commit c0324e8
Show file tree

Hide file tree

Showing 14 changed files with 443 additions and 326 deletions.
diff --git a/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc b/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
@@ -51,17 +51,17 @@ paddle::experimental::Tensor conv2d_ad_func(
 
     auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector);
 
-    auto NEW_input =
+    auto new_input =
         egr::EagerAmpAutoCast("input", input, amp_dst_dtype, op_name);
-    auto NEW_filter =
+    auto new_filter =
         egr::EagerAmpAutoCast("filter", filter, amp_dst_dtype, op_name);
 
     {
       paddle::imperative::AutoCastGuard guard(
           egr::Controller::Instance().GetCurrentTracer(),
           paddle::imperative::AmpLevel::O0);
-      return conv2d_ad_func(NEW_input,
-                            NEW_filter,
+      return conv2d_ad_func(new_input,
+                            new_filter,
                             strides,
                             paddings,
                             paddding_algorithm,
@@ -76,7 +76,7 @@ paddle::experimental::Tensor conv2d_ad_func(
 
   // Layout autotune
 
-  if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) {
+  if (egr::Controller::Instance().UseLayoutAutoTune()) {
     VLOG(5) << "Check and Prepare For LAYOUT";
     paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                          egr::kSlotSmallVectorSize>
@@ -85,11 +85,10 @@ paddle::experimental::Tensor conv2d_ad_func(
     auto op_name = phi::TransToFluidOpName("conv2d");
     auto transformer = egr::EagerLayoutAutotune<std::string>(
         op_name, tensors_vector, &data_format);
-    auto NEW_input = transformer->TransInTensor("input", input);
-    bool is_enable_tune =
-        paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune();
-    paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
-    auto out = conv2d_ad_func(NEW_input,
+    auto new_input = transformer->TransInTensor("input", input);
+    bool need_tune = egr::Controller::Instance().UseLayoutAutoTune();
+    egr::Controller::Instance().DisableLayoutAutoTune();
+    auto out = conv2d_ad_func(new_input,
                               filter,
                               strides,
                               paddings,
@@ -101,8 +100,8 @@ paddle::experimental::Tensor conv2d_ad_func(
                               workspace_size_MB,
                               exhaustive_search);
     transformer->SetOutTensorLayout(&out);
-    if (is_enable_tune) {
-      paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune();
+    if (need_tune) {
+      egr::Controller::Instance().EnableLayoutAutoTune();
     }
     // Returns
     return out;

diff --git a/paddle/fluid/eager/api/utils/global_utils.h b/paddle/fluid/eager/api/utils/global_utils.h
@@ -55,6 +55,23 @@ class Controller {
   paddle::imperative::AmpLevel GetAMPLevel() const {
     return tracer_->GetAmpLevel();
   }
+
+  bool UseLayoutAutoTune() {
+    bool use_autotune = false;
+#if defined(PADDLE_WITH_CUDA)
+    auto place = tracer_->ExpectedPlace();
+    bool is_gpu_place = paddle::platform::is_gpu_place(place);
+    if (is_gpu_place) {
+      use_autotune = tracer_->UseLayoutAutoTune();
+    }
+#endif
+    return use_autotune;
+  }
+
+  void DisableLayoutAutoTune() { tracer_->DisableLayoutAutoTune(); }
+
+  void EnableLayoutAutoTune() { tracer_->EnableLayoutAutoTune(); }
+
   bool HasGrad() const { return tracer_->HasGrad(); }
   void SetHasGrad(bool has_grad) { tracer_->SetHasGrad(has_grad); }
   std::string GenerateUniqueName(std::string key = "eager_in_tmp") {

diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
@@ -421,15 +421,14 @@ class {} : public egr::GradNodeBase {{
 """
 LAYOUT_LOGIC_TEMPLATE=\
 """
-  if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) {{
-    VLOG(5) << "Check and Prepare For LAYOUT";
+  if (egr::Controller::Instance().UseLayoutAutoTune()) {{
     paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> tensors_vector = {};
     {} 
     {}
-    paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune(); 
+    VLOG(5) << "Check and Prepare For LAYOUT "<< op_name;
+    paddle::imperative::LayoutAutotuneGuard guard(egr::Controller::Instance().GetCurrentTracer(), false);
     {}
     {}
-    paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune();
     // Returns
     return {};
   }}
@@ -906,6 +905,7 @@ def GenerateNodeCreationCodes(self, for_backward=False):
 
             set_grad_in_meta = f"{indent}grad_node->SetGradInMeta({name}, {pos});"
             set_retain_grad = f"{indent}egr::EagerUtils::CheckAndRetainGrad({name});"
+
             set_out_rank_list.append(set_out_rank)
             set_history_list.append(set_history)
             set_grad_in_meta_list.append(set_grad_in_meta)
@@ -998,6 +998,98 @@ def __init__(self, forward_api_contents, grad_api_contents,
         self.forward_definition_str = ""
         self.forward_declaration_str = ""
 
+    def GenerateForwardLayoutAutotune(self, forward_api_name,
+                                      amp_tensors_vector_list,
+                                      layout_tensors_vector_optional_list,
+                                      layout_autotune_list_str,
+                                      returns_type_str, returns_str,
+                                      amp_inputs_call_args_str):
+        intermediate_outputs = self.intermediate_outputs
+        forward_attrs_list = self.forward_attrs_list
+        forward_outputs_position_map = self.forward_outputs_position_map
+        num_outputs = len(
+            forward_outputs_position_map.keys()) - len(intermediate_outputs)
+        # for layout autotune attr
+        lightly_sensitive_attr = [
+            'axis', 'axes', 'dim', 'dims', 'start', 'end', 'stop'
+        ]
+        heavily_sensitive_attr = ['data_format', 'data_layout']
+        layout_autotune_attr = []
+        layout_autotune_attr_code_list = []
+        layout_autotune_attr_type_list = []
+        layout_autotune_attr_code_list.append(
+            f"auto op_name = phi::TransToFluidOpName(\"{forward_api_name}\");\n"
+        )
+
+        lightly_flag = False
+        heavily_flag = False
+        for name, atype, default_val, pos in forward_attrs_list:
+            for attr_name in lightly_sensitive_attr:
+                if name.find(attr_name) != -1 and (name
+                                                   not in layout_autotune_attr):
+                    lightly_flag = True
+                    layout_autotune_attr.append(name)
+                    layout_autotune_attr_type_list.append(atype)
+            if lightly_flag is False:
+                for attr_name in heavily_sensitive_attr:
+                    if name.find(attr_name) != -1 and (
+                            name not in layout_autotune_attr):
+                        layout_autotune_attr.append(name)
+                        layout_autotune_attr_type_list.append(atype)
+                        heavily_flag = True
+        if len(layout_autotune_attr) == 0:
+            layout_autotune_attr_code_list.append(
+                f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);\n"
+            )
+        elif len(layout_autotune_attr) == 1:
+            layout_autotune_attr_code_list.append(
+                f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}>(op_name, tensors_vector, &{layout_autotune_attr[0]});\n"
+            )
+        elif len(layout_autotune_attr) == 2:
+            layout_autotune_attr_code_list.append(
+                f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}, {layout_autotune_attr_type_list[1]}>(op_name, tensors_vector, &{layout_autotune_attr[0]}, &{layout_autotune_attr[1]});\n"
+            )
+        else:
+            layout_autotune_attr_code_list.append(
+                f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}>(op_name, tensors_vector,&{layout_autotune_attr[0]});\n"
+            )
+        # Out tensor
+        layout_inputs_call_args_str = amp_inputs_call_args_str
+        forward_function_name = GetDygraphForwardFunctionName(forward_api_name)
+        layout_tmp_result_list = []
+        layout_autotune_outs_list = []
+        result_name = "api_result"
+        if num_outputs == 1:
+            result_name = returns_str
+            layout_autotune_outs_list.append(
+                f"transformer -> SetOutTensorLayout(&{returns_str});\n")
+        else:
+            for name, (rtype, pos) in forward_outputs_position_map.items():
+                if name in intermediate_outputs:
+                    continue
+                layout_autotune_outs_list.append(
+                    f"    auto& {name} = std::get<{len(layout_tmp_result_list)}>(api_result);\n"
+                )
+                layout_autotune_outs_list.append(
+                    f"    transformer -> SetOutTensorLayout(&{name});\n")
+                layout_tmp_result_list.append(f"{name}")
+
+        tensors_vector_list_str = "{ " + ",".join(
+            amp_tensors_vector_list) + " }"
+
+        if len(amp_tensors_vector_list) == 0:
+            layout_logic_str = ""
+        else:
+            after_call_str = f"{returns_type_str} {result_name} = {forward_function_name}({layout_inputs_call_args_str});\n"
+            layout_logic_str = LAYOUT_LOGIC_TEMPLATE.format(
+                tensors_vector_list_str,
+                "    ".join(layout_tensors_vector_optional_list),
+                "    ".join(layout_autotune_attr_code_list) + "    " +
+                layout_autotune_list_str, after_call_str,
+                "    ".join(layout_autotune_outs_list), returns_str)
+
+        return layout_logic_str
+
     def GenerateForwardDefinitionAndDeclaration(self, is_inplaced):
         namespace = self.namespace
         if self.forward_api_name[-1] == '_' and not is_inplaced:
@@ -1033,7 +1125,7 @@ def GenerateForwardDefinitionAndDeclaration(self, is_inplaced):
         layout_tensors_vector_optional_list = []
         for name, (ttype, pos) in forward_inputs_position_map.items():
             inputs_call_list[pos] = f"{name}"
-            amp_inputs_call_list[pos] = f"NEW_{name}"
+            amp_inputs_call_list[pos] = f"new_{name}"
             is_optional = (name in optional_inputs)
             if IsPlainTensorType(ttype):
                 if is_optional:
@@ -1046,30 +1138,30 @@ def GenerateForwardDefinitionAndDeclaration(self, is_inplaced):
                         f"if ({name}) amp_tensors_vector.push_back({{ *{name} }});\n"
                     )
                     amp_autocast_optional_list.append(
-                        f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
+                        f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
                     )
                     layout_tensors_vector_optional_list.append(
                         f"if ({name}) tensors_vector.push_back({{ *{name} }});\n"
                     )
                     layout_autotune_optional_list.append(
-                        f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
+                        f"auto new_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
                     )
                 else:
                     if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys(
                     ):
                         arg_str = f"paddle::experimental::Tensor& {name}"
                         amp_tensors_vector_list.append(f"{{{name}}}")
                         amp_autocast_list.append(
-                            f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
+                            f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
                         )
                     else:
                         arg_str = f"const paddle::experimental::Tensor& {name}"
                         amp_tensors_vector_list.append(f"{{{name}}}")
                         amp_autocast_list.append(
-                            f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
+                            f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
                         )
                     layout_autotune_list.append(
-                        f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
+                        f"auto new_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
                     )
             else:
                 assert IsVectorTensorType(ttype)
@@ -1083,10 +1175,10 @@ def GenerateForwardDefinitionAndDeclaration(self, is_inplaced):
                         f"if ({name}) amp_tensors_vector.push_back( *{name} );\n"
                     )
                     amp_autocast_optional_list.append(
-                        f"auto NEW_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
+                        f"auto new_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
                     )
                     layout_autotune_optional_list.append(
-                        f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
+                        f"auto new_{name} = transformer->TransInTensors(\"{name}\", {name});\n"
                     )
                 else:
                     if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys(
@@ -1096,60 +1188,15 @@ def GenerateForwardDefinitionAndDeclaration(self, is_inplaced):
                         arg_str = f"const std::vector<paddle::experimental::Tensor>& {name}"
                     amp_tensors_vector_list.append(f"{name}")
                     amp_autocast_list.append(
-                        f"auto NEW_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
+                        f"auto new_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
                     )
                     layout_autotune_list.append(
-                        f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
+                        f"auto new_{name} = transformer->TransInTensors(\"{name}\", {name});\n"
                     )
 
             inputs_args_definition_list[pos] = arg_str
             inputs_args_declaration_list[pos] = arg_str
 
-        # for layout autotune attr
-        lightly_sensitive_attr = [
-            'axis', 'axes', 'dim', 'dims', 'start', 'end', 'stop'
-        ]
-        heavily_sensitive_attr = ['data_format', 'data_layout']
-        layout_autotune_attr = []
-        layout_autotune_attr_code_list = []
-        layout_autotune_attr_type_list = []
-        layout_autotune_attr_code_list.append(
-            f"auto op_name = phi::TransToFluidOpName(\"{forward_api_name}\");\n"
-        )
-
-        lightly_flag = False
-        heavily_flag = False
-        for name, atype, default_val, pos in forward_attrs_list:
-            for attr_name in lightly_sensitive_attr:
-                if name.find(
-                        attr_name) != -1 and name not in layout_autotune_attr:
-                    lightly_flag = True
-                    layout_autotune_attr.append(name)
-                    layout_autotune_attr_type_list.append(atype)
-            if lightly_flag is False:
-                for attr_name in heavily_sensitive_attr:
-                    if name.find(attr_name
-                                 ) != -1 and name not in layout_autotune_attr:
-                        layout_autotune_attr.append(name)
-                        layout_autotune_attr_type_list.append(atype)
-                        heavily_flag = True
-        if len(layout_autotune_attr) == 0:
-            layout_autotune_attr_code_list.append(
-                f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);\n"
-            )
-        elif len(layout_autotune_attr) == 1:
-            layout_autotune_attr_code_list.append(
-                f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}>(op_name, tensors_vector, &{layout_autotune_attr[0]});\n"
-            )
-        elif len(layout_autotune_attr) == 2:
-            layout_autotune_attr_code_list.append(
-                f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}, {layout_autotune_attr_type_list[1]}>(op_name, tensors_vector, &{layout_autotune_attr[0]}, &{layout_autotune_attr[1]});\n"
-            )
-        else:
-            layout_autotune_attr_code_list.append(
-                f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector, {len(layout_autotune_attr)});\n"
-            )
-
         # forward attrs
         for name, atype, default_val, pos in forward_attrs_list:
             inputs_call_list[pos] = name
@@ -1339,33 +1386,12 @@ def GenerateForwardDefinitionAndDeclaration(self, is_inplaced):
                 amp_autocast_list_str, amp_call_str)
 
         # Forward layout autotune
-        layout_inputs_call_args_str = amp_inputs_call_args_str
-        layout_tmp_result_list = []
-        layout_autotune_outs_list = ""
-        if num_outputs == 1:
-            layout_autotune_outs_list += f"{indent}auto {returns_str} = api_result;\n"
-            layout_autotune_outs_list += f"{indent}transformer -> SetOutTensorLayout(&{returns_str});\n"
-        else:
-            for name, (rtype, pos) in forward_outputs_position_map.items():
-                if name in intermediate_outputs:
-                    continue
-                layout_autotune_outs_list += f"{indent}auto& {name} = std::get<{len(layout_tmp_result_list)}>(api_result);\n"
-                layout_autotune_outs_list += f"{indent}transformer -> SetOutTensorLayout(&{name});\n"
-                layout_tmp_result_list.append(f"{name}")
-
-        if returns_type_str == "paddle::experimental::Tensor&" or forward_api_name == "slice" or forward_api_name == "strided_slice" or len(
-                layout_autotune_attr) == 0:
-            layout_logic_str = ""
-        else:
-            # after_call_str = f"return {forward_ad_function_name}({layout_inputs_call_args_str});\n"
-            after_call_str = f"auto api_result = {forward_ad_function_name}({layout_inputs_call_args_str});\n"
-            layout_logic_str = LAYOUT_LOGIC_TEMPLATE.format(
-                amp_tensors_vector_list_str,
-                "    ".join(layout_tensors_vector_optional_list),
-                "    ".join(layout_autotune_attr_code_list) + "    " +
-                "    ".join(layout_autotune_list) +
-                "   ".join(layout_autotune_optional_list), after_call_str,
-                layout_autotune_outs_list, returns_str)
+        layout_autotune_list_str = "    ".join(
+            layout_autotune_list) + "    ".join(layout_autotune_optional_list)
+        layout_logic_str = self.GenerateForwardLayoutAutotune(
+            forward_api_name, amp_tensors_vector_list,
+            layout_tensors_vector_optional_list, layout_autotune_list_str,
+            returns_type_str, returns_str, amp_inputs_call_args_str)
 
         # For inputs outputs prepare for logging
         var_str = f"\n{indent}  std::string input_str = \"\";"