Skip to content

Commit

Permalink
[Cherry-pick] Update layoutautotune for inplace (#45826) (#46226)
Browse files Browse the repository at this point in the history
cherry-pick from #45826
LayoutAutotune 支持 inplace 类型的OP
 根据 Add eager layout autotune #45409 修改意见调整UseAutotune
将LayoutAutotune判断放到controller中,与AMP 判断保持一致
  • Loading branch information
AnnaTrainingG authored Sep 20, 2022
1 parent 7712ce1 commit c0324e8
Show file tree
Hide file tree
Showing 14 changed files with 443 additions and 326 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,17 @@ paddle::experimental::Tensor conv2d_ad_func(

auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector);

auto NEW_input =
auto new_input =
egr::EagerAmpAutoCast("input", input, amp_dst_dtype, op_name);
auto NEW_filter =
auto new_filter =
egr::EagerAmpAutoCast("filter", filter, amp_dst_dtype, op_name);

{
paddle::imperative::AutoCastGuard guard(
egr::Controller::Instance().GetCurrentTracer(),
paddle::imperative::AmpLevel::O0);
return conv2d_ad_func(NEW_input,
NEW_filter,
return conv2d_ad_func(new_input,
new_filter,
strides,
paddings,
paddding_algorithm,
Expand All @@ -76,7 +76,7 @@ paddle::experimental::Tensor conv2d_ad_func(

// Layout autotune

if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) {
if (egr::Controller::Instance().UseLayoutAutoTune()) {
VLOG(5) << "Check and Prepare For LAYOUT";
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
Expand All @@ -85,11 +85,10 @@ paddle::experimental::Tensor conv2d_ad_func(
auto op_name = phi::TransToFluidOpName("conv2d");
auto transformer = egr::EagerLayoutAutotune<std::string>(
op_name, tensors_vector, &data_format);
auto NEW_input = transformer->TransInTensor("input", input);
bool is_enable_tune =
paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune();
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
auto out = conv2d_ad_func(NEW_input,
auto new_input = transformer->TransInTensor("input", input);
bool need_tune = egr::Controller::Instance().UseLayoutAutoTune();
egr::Controller::Instance().DisableLayoutAutoTune();
auto out = conv2d_ad_func(new_input,
filter,
strides,
paddings,
Expand All @@ -101,8 +100,8 @@ paddle::experimental::Tensor conv2d_ad_func(
workspace_size_MB,
exhaustive_search);
transformer->SetOutTensorLayout(&out);
if (is_enable_tune) {
paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune();
if (need_tune) {
egr::Controller::Instance().EnableLayoutAutoTune();
}
// Returns
return out;
Expand Down
17 changes: 17 additions & 0 deletions paddle/fluid/eager/api/utils/global_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,23 @@ class Controller {
paddle::imperative::AmpLevel GetAMPLevel() const {
return tracer_->GetAmpLevel();
}

bool UseLayoutAutoTune() {
bool use_autotune = false;
#if defined(PADDLE_WITH_CUDA)
auto place = tracer_->ExpectedPlace();
bool is_gpu_place = paddle::platform::is_gpu_place(place);
if (is_gpu_place) {
use_autotune = tracer_->UseLayoutAutoTune();
}
#endif
return use_autotune;
}

void DisableLayoutAutoTune() { tracer_->DisableLayoutAutoTune(); }

void EnableLayoutAutoTune() { tracer_->EnableLayoutAutoTune(); }

bool HasGrad() const { return tracer_->HasGrad(); }
void SetHasGrad(bool has_grad) { tracer_->SetHasGrad(has_grad); }
std::string GenerateUniqueName(std::string key = "eager_in_tmp") {
Expand Down
198 changes: 112 additions & 86 deletions paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,15 +421,14 @@ class {} : public egr::GradNodeBase {{
"""
LAYOUT_LOGIC_TEMPLATE=\
"""
if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) {{
VLOG(5) << "Check and Prepare For LAYOUT";
if (egr::Controller::Instance().UseLayoutAutoTune()) {{
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> tensors_vector = {};
{}
{}
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
VLOG(5) << "Check and Prepare For LAYOUT "<< op_name;
paddle::imperative::LayoutAutotuneGuard guard(egr::Controller::Instance().GetCurrentTracer(), false);
{}
{}
paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune();
// Returns
return {};
}}
Expand Down Expand Up @@ -906,6 +905,7 @@ def GenerateNodeCreationCodes(self, for_backward=False):

set_grad_in_meta = f"{indent}grad_node->SetGradInMeta({name}, {pos});"
set_retain_grad = f"{indent}egr::EagerUtils::CheckAndRetainGrad({name});"

set_out_rank_list.append(set_out_rank)
set_history_list.append(set_history)
set_grad_in_meta_list.append(set_grad_in_meta)
Expand Down Expand Up @@ -998,6 +998,98 @@ def __init__(self, forward_api_contents, grad_api_contents,
self.forward_definition_str = ""
self.forward_declaration_str = ""

def GenerateForwardLayoutAutotune(self, forward_api_name,
amp_tensors_vector_list,
layout_tensors_vector_optional_list,
layout_autotune_list_str,
returns_type_str, returns_str,
amp_inputs_call_args_str):
intermediate_outputs = self.intermediate_outputs
forward_attrs_list = self.forward_attrs_list
forward_outputs_position_map = self.forward_outputs_position_map
num_outputs = len(
forward_outputs_position_map.keys()) - len(intermediate_outputs)
# for layout autotune attr
lightly_sensitive_attr = [
'axis', 'axes', 'dim', 'dims', 'start', 'end', 'stop'
]
heavily_sensitive_attr = ['data_format', 'data_layout']
layout_autotune_attr = []
layout_autotune_attr_code_list = []
layout_autotune_attr_type_list = []
layout_autotune_attr_code_list.append(
f"auto op_name = phi::TransToFluidOpName(\"{forward_api_name}\");\n"
)

lightly_flag = False
heavily_flag = False
for name, atype, default_val, pos in forward_attrs_list:
for attr_name in lightly_sensitive_attr:
if name.find(attr_name) != -1 and (name
not in layout_autotune_attr):
lightly_flag = True
layout_autotune_attr.append(name)
layout_autotune_attr_type_list.append(atype)
if lightly_flag is False:
for attr_name in heavily_sensitive_attr:
if name.find(attr_name) != -1 and (
name not in layout_autotune_attr):
layout_autotune_attr.append(name)
layout_autotune_attr_type_list.append(atype)
heavily_flag = True
if len(layout_autotune_attr) == 0:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);\n"
)
elif len(layout_autotune_attr) == 1:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}>(op_name, tensors_vector, &{layout_autotune_attr[0]});\n"
)
elif len(layout_autotune_attr) == 2:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}, {layout_autotune_attr_type_list[1]}>(op_name, tensors_vector, &{layout_autotune_attr[0]}, &{layout_autotune_attr[1]});\n"
)
else:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}>(op_name, tensors_vector,&{layout_autotune_attr[0]});\n"
)
# Out tensor
layout_inputs_call_args_str = amp_inputs_call_args_str
forward_function_name = GetDygraphForwardFunctionName(forward_api_name)
layout_tmp_result_list = []
layout_autotune_outs_list = []
result_name = "api_result"
if num_outputs == 1:
result_name = returns_str
layout_autotune_outs_list.append(
f"transformer -> SetOutTensorLayout(&{returns_str});\n")
else:
for name, (rtype, pos) in forward_outputs_position_map.items():
if name in intermediate_outputs:
continue
layout_autotune_outs_list.append(
f" auto& {name} = std::get<{len(layout_tmp_result_list)}>(api_result);\n"
)
layout_autotune_outs_list.append(
f" transformer -> SetOutTensorLayout(&{name});\n")
layout_tmp_result_list.append(f"{name}")

tensors_vector_list_str = "{ " + ",".join(
amp_tensors_vector_list) + " }"

if len(amp_tensors_vector_list) == 0:
layout_logic_str = ""
else:
after_call_str = f"{returns_type_str} {result_name} = {forward_function_name}({layout_inputs_call_args_str});\n"
layout_logic_str = LAYOUT_LOGIC_TEMPLATE.format(
tensors_vector_list_str,
" ".join(layout_tensors_vector_optional_list),
" ".join(layout_autotune_attr_code_list) + " " +
layout_autotune_list_str, after_call_str,
" ".join(layout_autotune_outs_list), returns_str)

return layout_logic_str

def GenerateForwardDefinitionAndDeclaration(self, is_inplaced):
namespace = self.namespace
if self.forward_api_name[-1] == '_' and not is_inplaced:
Expand Down Expand Up @@ -1033,7 +1125,7 @@ def GenerateForwardDefinitionAndDeclaration(self, is_inplaced):
layout_tensors_vector_optional_list = []
for name, (ttype, pos) in forward_inputs_position_map.items():
inputs_call_list[pos] = f"{name}"
amp_inputs_call_list[pos] = f"NEW_{name}"
amp_inputs_call_list[pos] = f"new_{name}"
is_optional = (name in optional_inputs)
if IsPlainTensorType(ttype):
if is_optional:
Expand All @@ -1046,30 +1138,30 @@ def GenerateForwardDefinitionAndDeclaration(self, is_inplaced):
f"if ({name}) amp_tensors_vector.push_back({{ *{name} }});\n"
)
amp_autocast_optional_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
)
layout_tensors_vector_optional_list.append(
f"if ({name}) tensors_vector.push_back({{ *{name} }});\n"
)
layout_autotune_optional_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
f"auto new_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
)
else:
if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys(
):
arg_str = f"paddle::experimental::Tensor& {name}"
amp_tensors_vector_list.append(f"{{{name}}}")
amp_autocast_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
)
else:
arg_str = f"const paddle::experimental::Tensor& {name}"
amp_tensors_vector_list.append(f"{{{name}}}")
amp_autocast_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
)
layout_autotune_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
f"auto new_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
)
else:
assert IsVectorTensorType(ttype)
Expand All @@ -1083,10 +1175,10 @@ def GenerateForwardDefinitionAndDeclaration(self, is_inplaced):
f"if ({name}) amp_tensors_vector.push_back( *{name} );\n"
)
amp_autocast_optional_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
f"auto new_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
)
layout_autotune_optional_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
f"auto new_{name} = transformer->TransInTensors(\"{name}\", {name});\n"
)
else:
if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys(
Expand All @@ -1096,60 +1188,15 @@ def GenerateForwardDefinitionAndDeclaration(self, is_inplaced):
arg_str = f"const std::vector<paddle::experimental::Tensor>& {name}"
amp_tensors_vector_list.append(f"{name}")
amp_autocast_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
f"auto new_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
)
layout_autotune_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
f"auto new_{name} = transformer->TransInTensors(\"{name}\", {name});\n"
)

inputs_args_definition_list[pos] = arg_str
inputs_args_declaration_list[pos] = arg_str

# for layout autotune attr
lightly_sensitive_attr = [
'axis', 'axes', 'dim', 'dims', 'start', 'end', 'stop'
]
heavily_sensitive_attr = ['data_format', 'data_layout']
layout_autotune_attr = []
layout_autotune_attr_code_list = []
layout_autotune_attr_type_list = []
layout_autotune_attr_code_list.append(
f"auto op_name = phi::TransToFluidOpName(\"{forward_api_name}\");\n"
)

lightly_flag = False
heavily_flag = False
for name, atype, default_val, pos in forward_attrs_list:
for attr_name in lightly_sensitive_attr:
if name.find(
attr_name) != -1 and name not in layout_autotune_attr:
lightly_flag = True
layout_autotune_attr.append(name)
layout_autotune_attr_type_list.append(atype)
if lightly_flag is False:
for attr_name in heavily_sensitive_attr:
if name.find(attr_name
) != -1 and name not in layout_autotune_attr:
layout_autotune_attr.append(name)
layout_autotune_attr_type_list.append(atype)
heavily_flag = True
if len(layout_autotune_attr) == 0:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);\n"
)
elif len(layout_autotune_attr) == 1:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}>(op_name, tensors_vector, &{layout_autotune_attr[0]});\n"
)
elif len(layout_autotune_attr) == 2:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}, {layout_autotune_attr_type_list[1]}>(op_name, tensors_vector, &{layout_autotune_attr[0]}, &{layout_autotune_attr[1]});\n"
)
else:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector, {len(layout_autotune_attr)});\n"
)

# forward attrs
for name, atype, default_val, pos in forward_attrs_list:
inputs_call_list[pos] = name
Expand Down Expand Up @@ -1339,33 +1386,12 @@ def GenerateForwardDefinitionAndDeclaration(self, is_inplaced):
amp_autocast_list_str, amp_call_str)

# Forward layout autotune
layout_inputs_call_args_str = amp_inputs_call_args_str
layout_tmp_result_list = []
layout_autotune_outs_list = ""
if num_outputs == 1:
layout_autotune_outs_list += f"{indent}auto {returns_str} = api_result;\n"
layout_autotune_outs_list += f"{indent}transformer -> SetOutTensorLayout(&{returns_str});\n"
else:
for name, (rtype, pos) in forward_outputs_position_map.items():
if name in intermediate_outputs:
continue
layout_autotune_outs_list += f"{indent}auto& {name} = std::get<{len(layout_tmp_result_list)}>(api_result);\n"
layout_autotune_outs_list += f"{indent}transformer -> SetOutTensorLayout(&{name});\n"
layout_tmp_result_list.append(f"{name}")

if returns_type_str == "paddle::experimental::Tensor&" or forward_api_name == "slice" or forward_api_name == "strided_slice" or len(
layout_autotune_attr) == 0:
layout_logic_str = ""
else:
# after_call_str = f"return {forward_ad_function_name}({layout_inputs_call_args_str});\n"
after_call_str = f"auto api_result = {forward_ad_function_name}({layout_inputs_call_args_str});\n"
layout_logic_str = LAYOUT_LOGIC_TEMPLATE.format(
amp_tensors_vector_list_str,
" ".join(layout_tensors_vector_optional_list),
" ".join(layout_autotune_attr_code_list) + " " +
" ".join(layout_autotune_list) +
" ".join(layout_autotune_optional_list), after_call_str,
layout_autotune_outs_list, returns_str)
layout_autotune_list_str = " ".join(
layout_autotune_list) + " ".join(layout_autotune_optional_list)
layout_logic_str = self.GenerateForwardLayoutAutotune(
forward_api_name, amp_tensors_vector_list,
layout_tensors_vector_optional_list, layout_autotune_list_str,
returns_type_str, returns_str, amp_inputs_call_args_str)

# For inputs outputs prepare for logging
var_str = f"\n{indent} std::string input_str = \"\";"
Expand Down
Loading

0 comments on commit c0324e8

Please sign in to comment.