From af86c51ac173717ccbf771a953b54466270123a5 Mon Sep 17 00:00:00 2001 From: Wang Zhen Date: Thu, 28 Oct 2021 11:29:42 +0000 Subject: [PATCH 01/14] Update UT test_parallel_executor_run_cinn.py. --- .../test_parallel_executor_run_cinn.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py index 601da32cfb129..f3e2bff332a05 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py @@ -75,7 +75,7 @@ def build_program(main_program, startup_program): return img, label, avg_loss -def do_test(dot_save_dir): +def train(dot_save_dir, prefix): startup_program = paddle.static.Program() main_program = paddle.static.Program() img, label, loss = build_program(main_program, startup_program) @@ -86,32 +86,35 @@ def do_test(dot_save_dir): exe.run(startup_program) build_strategy = paddle.static.BuildStrategy() - build_strategy.debug_graphviz_path = os.path.join(dot_save_dir, "viz") + build_strategy.debug_graphviz_path = os.path.join(dot_save_dir, prefix) compiled_program = paddle.static.CompiledProgram( main_program, build_strategy).with_data_parallel(loss_name=loss.name) - iters = 1 + iters = 100 feed = rand_data(img.name, label.name, iters) + loss_values = [] for step in range(iters): loss_v = exe.run(compiled_program, feed=feed[step], fetch_list=[loss], return_merged=False) - logger.info("loss value = {}".format(loss_v)) + loss_values.append(loss_v[0][0][0]) + return loss_values @unittest.skipIf(not set_cinn_flag(True), "Paddle is not compiled with CINN.") class TestParallelExecutorRunCinn(unittest.TestCase): def setUp(self): - set_cinn_flag(True) self.tmpdir = tempfile.mkdtemp(prefix="dots_") def tearDown(self): - set_cinn_flag(False) shutil.rmtree(self.tmpdir) def test_run_with_cinn(self): - do_test(self.tmpdir) + cinn_losses = train(self.tmpdir, "paddle") + set_cinn_flag(False) + pd_losses = train(self.tmpdir, "cinn") + np.allclose(cinn_losses, pd_losses) if __name__ == '__main__': From 1d13d386f0f90ad9be6c6ea16d1c734ff3cfae6c Mon Sep 17 00:00:00 2001 From: Wang Zhen Date: Mon, 1 Nov 2021 13:27:37 +0000 Subject: [PATCH 02/14] Add FLAGS_allow_cinn_ops & FLAGS_deny_cinn_ops & FLAGS_cinn_ops_delim. --- .../framework/paddle2cinn/build_cinn_pass.cc | 30 +++++- .../paddle2cinn/cinn_compiler_test.cc | 99 +++++++++++++++---- paddle/fluid/platform/flags.cc | 36 ++++++- .../test_parallel_executor_run_cinn.py | 2 +- 4 files changed, 145 insertions(+), 22 deletions(-) diff --git a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc index 173ba55fd9d1a..c8eea6c15482e 100644 --- a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc +++ b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc @@ -33,6 +33,11 @@ limitations under the License. */ #include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" +#include "paddle/fluid/string/string_helper.h" + +DECLARE_string(allow_cinn_ops); +DECLARE_string(deny_cinn_ops); +DECLARE_string(cinn_ops_delim); namespace paddle { namespace framework { @@ -340,8 +345,29 @@ void ReplaceSubGraphWithCinnOpNode(const GraphNodeSet& cluster, // to check whether the op node supported by CINN. void SearchAllSubgraphs(Graph* graph) { auto teller = [](const Node* node) { - return ::cinn::frontend::OpMapperRegistry::Global()->Find(node->Name()) != - nullptr; + bool registered = ::cinn::frontend::OpMapperRegistry::Global()->Find( + node->Name()) != nullptr; + // if the op type is registered in CINN and allow_ops is not empty, return + // true only when it is in allow_ops + auto allow_ops = + string::split_string(FLAGS_allow_cinn_ops, FLAGS_cinn_ops_delim); + if (allow_ops.size()) { + return registered && + std::find(allow_ops.begin(), allow_ops.end(), node->Name()) != + allow_ops.end(); + } + // if the op type is registered in CINN and deny_ops is not empty, return + // true only when it is not in deny_ops + auto deny_ops = + string::split_string(FLAGS_deny_cinn_ops, FLAGS_cinn_ops_delim); + if (deny_ops.size()) { + return registered && + std::find(deny_ops.begin(), deny_ops.end(), node->Name()) == + deny_ops.end(); + } + // if the user doesn't set FLAGS_allow_cinn_ops and FLAGS_deny_cinn_ops, + // return true only when it is registered in CINN + return registered; }; std::vector clusters = framework::ir::SubgraphDetector(graph, teller)(); diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc index 22792e0f8c359..3aa2626fe06b4 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc @@ -14,12 +14,18 @@ #include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" +#include #include #include +#include #include +#include +#include +#include #include "cinn/common/target.h" #include "gtest/gtest.h" +#include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/lod_tensor.h" @@ -32,10 +38,59 @@ namespace paddle { namespace framework { namespace paddle2cinn { - using ir::Graph; using ::cinn::common::Target; +namespace { +template > +std::ostream& operator<<(std::ostream& os, const std::vector& vec) { + os << "{ "; + for (auto e : vec) { + os << e << " "; + } + os << "}\n"; + return os; +} + +// Get compilation_key values +std::vector GetCompilationKeys(const Graph& graph) { + std::vector compilation_keys; + for (auto& node : graph.Nodes()) { + if (node->IsOp() && node->Name() == kCinnLaunchOp) { + compilation_keys.emplace_back( + BOOST_GET_CONST(std::string, node->Op()->GetAttr(kCompilationKey))); + } + } + return compilation_keys; +} + +// Get inputs info +std::unordered_map> GetInputsInfo( + const std::string& key, const Graph& graph) { + std::unordered_set inputs; + for (auto& node : graph.Nodes()) { + if (node->IsOp() && node->Name() == kCinnLaunchOp) { + if (BOOST_GET_CONST(std::string, node->Op()->GetAttr(kCompilationKey)) != + key) { + continue; + } + for (auto in_var_name : node->Op()->InputArgumentNames()) { + VLOG(4) << "get an input name: " << in_var_name; + inputs.emplace(in_var_name); + } + } + } + + std::unordered_map> inputs_info; + for (auto& node : graph.Nodes()) { + if (node->IsVar() && inputs.count(node->Name())) { + VLOG(4) << node->Name() << " : " << node->Var()->GetShape(); + inputs_info.emplace(node->Name(), node->Var()->GetShape()); + } + } + return inputs_info; +} + // X - // | -> mul -> MUL_OUT - // Y - | -> elementwise_add -> ADD_OUT -> relu -> RELU_OUT @@ -65,6 +120,9 @@ std::unique_ptr CreateGraph() { auto* mul_out = global_block->Var("MUL_OUT"); mul_out->SetType(proto::VarType::LOD_TENSOR); + mul_out->SetLoDLevel(0); + mul_out->SetDataType(proto::VarType::FP32); + mul_out->SetShape({1000, 100}); mul_op->SetOutput("Out", {mul_out->Name()}); // add @@ -83,6 +141,9 @@ std::unique_ptr CreateGraph() { auto* add_out = global_block->Var("ADD_OUT"); add_out->SetType(proto::VarType::LOD_TENSOR); + add_out->SetLoDLevel(0); + add_out->SetDataType(proto::VarType::FP32); + add_out->SetShape({1000, 100}); add_op->SetOutput("Out", {add_out->Name()}); // relu @@ -92,11 +153,16 @@ std::unique_ptr CreateGraph() { auto* relu_out = global_block->Var("RELU_OUT"); relu_out->SetType(proto::VarType::LOD_TENSOR); + relu_out->SetLoDLevel(0); + relu_out->SetDataType(proto::VarType::FP32); + relu_out->SetShape({1000, 100}); relu_op->SetOutput("Out", {relu_out->Name()}); program.Flush(); return std::make_unique(program); } +} // namespace + TEST(CinnCompilerTest, Compile) { auto viz_pass = ir::PassRegistry::Instance().Get("graph_viz_pass"); auto cinn_pass = ir::PassRegistry::Instance().Get("build_cinn_pass"); @@ -113,32 +179,29 @@ TEST(CinnCompilerTest, Compile) { cinn_pass->Apply(graph.get()); viz_graph("processed_graph.dot", graph.get()); // get the compilation_key - std::vector compilation_keys; - for (auto& node : graph->Nodes()) { - if (node->IsOp() && node->Name() == kCinnLaunchOp) { - compilation_keys.emplace_back( - BOOST_GET_CONST(std::string, node->Op()->GetAttr(kCompilationKey))); - } - } + auto compilation_keys = GetCompilationKeys(*graph); ASSERT_EQ(compilation_keys.size(), 1); const auto& compilation_key = compilation_keys[0]; auto* cinn_compiler = CinnCompiler::GetInstance(); const auto& compiling_graph = cinn_compiler->FindGraph(compilation_key); - // viz_graph("compiling_graph.dot", const_cast(&compiling_graph)); + viz_graph("compiling_graph.dot", const_cast(&compiling_graph)); EXPECT_THROW(cinn_compiler->FindGraph("no_existed"), paddle::platform::EnforceNotMet); - LoDTensor tensor1, tensor2, tensor3; - tensor1.Resize({1000, 784}); - tensor2.Resize({784, 100}); - tensor3.Resize({100}); - tensor1.mutable_data(platform::CPUPlace()); - tensor2.mutable_data(platform::CPUPlace()); - tensor3.mutable_data(platform::CPUPlace()); - std::map input_tensors = { - {"X", &tensor1}, {"Y", &tensor2}, {"Z", &tensor3}}; + auto inputs_info = GetInputsInfo(compilation_key, *graph); + std::unordered_map create_inputs; + for (const auto& pair : inputs_info) { + auto& tensor = create_inputs[pair.first]; + tensor.Resize(make_ddim(pair.second)); + tensor.mutable_data(platform::CPUPlace()); + } + std::map input_tensors; + std::for_each(create_inputs.begin(), create_inputs.end(), + [&input_tensors](const auto& val) { + input_tensors.emplace(val.first, &val.second); + }); auto compile_fn = [&](const Target& target) { const auto& compiled_obj = diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc index ef908be8462ed..e560f950bee17 100644 --- a/paddle/fluid/platform/flags.cc +++ b/paddle/fluid/platform/flags.cc @@ -698,6 +698,7 @@ PADDLE_DEFINE_EXPORTED_bool(allreduce_record_one_event, false, "events. Currently, only fuse allreduce supports " "this. Otherwise, the precision may be wrong."); +#ifdef PADDLE_WITH_CINN /* * CINN related FLAG * Name: FLAGS_use_cinn @@ -705,9 +706,42 @@ PADDLE_DEFINE_EXPORTED_bool(allreduce_record_one_event, false, * Value Range: bool, default=false * Example: FLAGS_use_cinn=true would run PaddlePaddle using CINN */ -#ifdef PADDLE_WITH_CINN PADDLE_DEFINE_EXPORTED_bool( use_cinn, false, "It controls whether to run PaddlePaddle using CINN"); + +/* + * CINN related FLAG + * Name: FLAGS_allow_cinn_ops + * Since Version: 2.3 + * Value Range: string, default="" + * Example: FLAGS_allow_cinn_ops="mul;relu" would only cover `mul` and `relu` + * when using CINN + */ +PADDLE_DEFINE_EXPORTED_string(allow_cinn_ops, "", + "It controls the cinn op subset to be used, " + "which has the highest priority."); + +/* + * CINN related FLAG + * Name: FLAGS_deny_cinn_ops + * Since Version: 2.3 + * Value Range: string, default="" + * Example: FLAGS_deny_cinn_ops="mul" would block `mul` op when using CINN + */ +PADDLE_DEFINE_EXPORTED_string(deny_cinn_ops, "", + "It controls the cinn op subset to be not used."); + +/* + * CINN related FLAG + * Name: FLAGS_cinn_ops_delim + * Since Version: 2.3 + * Value Range: string, default=";" + * Example: FLAGS_cinn_ops_delim=";" would use `;` to split the two lists + * (FLAGS_allow_cinn_ops & FLAGS_deny_cinn_ops) + */ +PADDLE_DEFINE_EXPORTED_string(cinn_ops_delim, ";", + "The delim of cinn ops used in " + "FLAGS_allow_cinn_ops & FLAGS_deny_cinn_ops."); #endif DEFINE_int32(record_pool_max_size, 2000000, diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py index f3e2bff332a05..8697883ac88a8 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py @@ -114,7 +114,7 @@ def test_run_with_cinn(self): cinn_losses = train(self.tmpdir, "paddle") set_cinn_flag(False) pd_losses = train(self.tmpdir, "cinn") - np.allclose(cinn_losses, pd_losses) + self.assertTrue(np.allclose(cinn_losses, pd_losses, atol=1e-5)) if __name__ == '__main__': From 10b3b5da55f987e4859db7963a7957c90f2097a5 Mon Sep 17 00:00:00 2001 From: Wang Zhen Date: Tue, 2 Nov 2021 03:46:42 +0000 Subject: [PATCH 03/14] Use the custom StringSplit function and remove the FLAGS_cinn_ops_delim flag. --- .../framework/paddle2cinn/build_cinn_pass.cc | 30 ++++++++++++------- paddle/fluid/platform/flags.cc | 12 -------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc index c8eea6c15482e..866daa66ad533 100644 --- a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc +++ b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include +#include #include #include #include @@ -37,7 +38,6 @@ limitations under the License. */ DECLARE_string(allow_cinn_ops); DECLARE_string(deny_cinn_ops); -DECLARE_string(cinn_ops_delim); namespace paddle { namespace framework { @@ -51,6 +51,20 @@ using GraphNodeSet = std::unordered_set; using GraphNodeMap = std::unordered_map; namespace { +// The delim(`;`) that is used to split the FLAGS_allow_cinn_ops +// & FLAGS_deny_cinn_ops. +constexpr char kDelim[] = ";"; + +std::unordered_set StringSplit(const std::string& str, + const std::string& delim) { + std::regex reg(delim); + std::unordered_set elems{ + std::sregex_token_iterator(str.begin(), str.end(), reg, -1), + std::sregex_token_iterator()}; + elems.erase(""); + return elems; +} + int ExtractOpRole(const GraphNodeSet& cluster) { std::unordered_set op_roles; std::string attr_name = OpProtoAndCheckerMaker::OpRoleAttrName(); @@ -349,21 +363,15 @@ void SearchAllSubgraphs(Graph* graph) { node->Name()) != nullptr; // if the op type is registered in CINN and allow_ops is not empty, return // true only when it is in allow_ops - auto allow_ops = - string::split_string(FLAGS_allow_cinn_ops, FLAGS_cinn_ops_delim); + auto allow_ops = StringSplit(FLAGS_allow_cinn_ops, kDelim); if (allow_ops.size()) { - return registered && - std::find(allow_ops.begin(), allow_ops.end(), node->Name()) != - allow_ops.end(); + return registered && allow_ops.count(node->Name()); } // if the op type is registered in CINN and deny_ops is not empty, return // true only when it is not in deny_ops - auto deny_ops = - string::split_string(FLAGS_deny_cinn_ops, FLAGS_cinn_ops_delim); + auto deny_ops = StringSplit(FLAGS_deny_cinn_ops, kDelim); if (deny_ops.size()) { - return registered && - std::find(deny_ops.begin(), deny_ops.end(), node->Name()) == - deny_ops.end(); + return registered && !deny_ops.count(node->Name()); } // if the user doesn't set FLAGS_allow_cinn_ops and FLAGS_deny_cinn_ops, // return true only when it is registered in CINN diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc index 8752f841fad0c..09b0500dd3641 100644 --- a/paddle/fluid/platform/flags.cc +++ b/paddle/fluid/platform/flags.cc @@ -742,18 +742,6 @@ PADDLE_DEFINE_EXPORTED_string(allow_cinn_ops, "", */ PADDLE_DEFINE_EXPORTED_string(deny_cinn_ops, "", "It controls the cinn op subset to be not used."); - -/* - * CINN related FLAG - * Name: FLAGS_cinn_ops_delim - * Since Version: 2.3 - * Value Range: string, default=";" - * Example: FLAGS_cinn_ops_delim=";" would use `;` to split the two lists - * (FLAGS_allow_cinn_ops & FLAGS_deny_cinn_ops) - */ -PADDLE_DEFINE_EXPORTED_string(cinn_ops_delim, ";", - "The delim of cinn ops used in " - "FLAGS_allow_cinn_ops & FLAGS_deny_cinn_ops."); #endif DEFINE_int32(record_pool_max_size, 2000000, From 91065d4dca98868f30a8327b3d6f2d3345aaa7cd Mon Sep 17 00:00:00 2001 From: Wang Zhen Date: Tue, 2 Nov 2021 11:21:58 +0000 Subject: [PATCH 04/14] Add FlagController test. --- .../framework/paddle2cinn/CMakeLists.txt | 2 +- .../framework/paddle2cinn/build_cinn_pass.cc | 7 ++- .../framework/paddle2cinn/cinn_compiler.cc | 45 +++++++++++--- .../framework/paddle2cinn/cinn_compiler.h | 19 +++++- .../paddle2cinn/cinn_compiler_test.cc | 60 +++++++++++++++++++ paddle/fluid/operators/cinn_launch_op.h | 3 +- 6 files changed, 122 insertions(+), 14 deletions(-) diff --git a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt index e5dac1aa6292d..6eef1a00e1e73 100644 --- a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt +++ b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt @@ -2,7 +2,7 @@ cc_library(cinn_cache_key SRCS cinn_cache_key.cc DEPS boost graph graph_helper l cc_library(build_cinn_pass SRCS build_cinn_pass.cc DEPS pass subgraph_detector graph_pattern_detector cinn_compiler errors enforce) cc_library(transform_desc SRCS transform_desc.cc DEPS proto_desc cinn) cc_library(cinn_graph_symbolization SRCS cinn_graph_symbolization.cc DEPS lod_tensor graph transform_desc cinn) -cc_library(cinn_compiler SRCS cinn_compiler.cc DEPS graph lod_tensor cinn_cache_key cinn_graph_symbolization cinn) +cc_library(cinn_compiler SRCS cinn_compiler.cc DEPS framework_proto graph lod_tensor cinn_cache_key cinn_graph_symbolization cinn) cc_test(cinn_cache_key_test SRCS cinn_cache_key_test.cc DEPS cinn_cache_key) cc_test(build_cinn_pass_test SRCS build_cinn_pass_test.cc DEPS build_cinn_pass cinn_compiler) diff --git a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc index 866daa66ad533..37f1c02c3aebd 100644 --- a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc +++ b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc @@ -26,6 +26,8 @@ limitations under the License. */ #include "cinn/frontend/op_mapper_registry.h" #include "cinn/frontend/op_mappers/use_op_mappers.h" +#include "gflags/gflags.h" +#include "glog/logging.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/node.h" @@ -34,7 +36,6 @@ limitations under the License. */ #include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" -#include "paddle/fluid/string/string_helper.h" DECLARE_string(allow_cinn_ops); DECLARE_string(deny_cinn_ops); @@ -377,6 +378,8 @@ void SearchAllSubgraphs(Graph* graph) { // return true only when it is registered in CINN return registered; }; + VLOG(4) << "The allowed Cinn Ops: " << FLAGS_allow_cinn_ops; + VLOG(4) << "The denied Cinn Ops: " << FLAGS_deny_cinn_ops; std::vector clusters = framework::ir::SubgraphDetector(graph, teller)(); @@ -409,7 +412,7 @@ void SearchAllSubgraphs(Graph* graph) { // save it in CinnCompiler std::string compilation_key = cinn_compiler->AddGraph(CreateNewSubGraph( cluster_set, cluster_internals, cluster_inputs, cluster_outputs)); - VLOG(4) << "Compilation Key: " << compilation_key; + VLOG(4) << "Compilation Key:\n" << ReadableProtoStr(compilation_key); // Replace the found cluster to a new cinn op node ReplaceSubGraphWithCinnOpNode(cluster_set, cluster_inputs, cluster_outputs, diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc index bcff92ec18eda..59e9f25a4270e 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc @@ -29,11 +29,13 @@ #include "cinn/hlir/framework/graph_compiler.h" #include "cinn/hlir/framework/pass.h" #include "cinn/hlir/pass/use_pass.h" +#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/rw_lock.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/enforce.h" @@ -59,22 +61,32 @@ std::string CinnCompiler::AddGraph(std::unique_ptr graph) { ProgramDesc program; GraphToProgram(*graph, &program); program.Proto()->SerializeToString(&graph_key); - if (!graphs_.count(graph_key)) { + VLOG(4) << "Add a graph into CinnCompiler, which is:\n" + << ReadableProtoStr(graph_key); + bool exist = false; + { + AutoRDLock r_guard{&rwlock_}; + exist = graphs_.count(graph_key) != 0; + } + if (!exist) { + AutoWRLock w_guard{&rwlock_}; graphs_[graph_key] = std::move(graph); } else { LOG(WARNING) << "The graph being added is already in CinnCompiler. Its key is:\n" - << graph_key; + << ReadableProtoStr(graph_key); } return graph_key; } const Graph& CinnCompiler::FindGraph(const std::string& graph_key) const { + AutoRDLock guard{&rwlock_}; PADDLE_ENFORCE_NE( graphs_.count(graph_key), 0, - platform::errors::InvalidArgument("Can not find the target graph: %s", - graph_key.c_str())); - return *graphs_.at(graph_key); + platform::errors::InvalidArgument("Can not find the target graph:\n%s", + ReadableProtoStr(graph_key).c_str())); + const auto& graph = *graphs_.at(graph_key); + return graph; } const CinnCompiledObject& CinnCompiler::Compile( @@ -82,17 +94,28 @@ const CinnCompiledObject& CinnCompiler::Compile( const std::map& input_tensors, const Target& target) { CinnCacheKey cur_key(graph, input_tensors, target.arch_str()); - if (!cache_.count(cur_key)) { + bool exist = false; + { + AutoRDLock r_guard{&rwlock_}; + exist = cache_.count(cur_key) != 0; + } + if (!exist) { + auto compiled_res = CompileGraph(graph, input_tensors, target); + AutoWRLock w_guard{&rwlock_}; real_compiled_num_++; - cache_[cur_key] = CompileGraph(graph, input_tensors, target); + cache_[cur_key] = std::move(compiled_res); } - return *cache_[cur_key]; + AutoRDLock guard{&rwlock_}; + const auto& cached_boj = *cache_[cur_key]; + return cached_boj; } const CinnCompiledObject& CinnCompiler::Compile( const std::string& compilation_key, const std::map& input_tensors, const Target& target) { + VLOG(4) << "The graph to be compiled is:\n" + << ReadableProtoStr(compilation_key); const auto& graph = FindGraph(compilation_key); return Compile(graph, input_tensors, target); } @@ -125,6 +148,12 @@ std::unique_ptr CinnCompiler::CompileGraph( return compiled_obj; } +std::string ReadableProtoStr(const std::string& bytes) { + proto::ProgramDesc program_desc; + program_desc.ParseFromString(bytes); + return program_desc.DebugString(); +} + } // namespace paddle2cinn } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.h b/paddle/fluid/framework/paddle2cinn/cinn_compiler.h index 0d6935849696b..3a623fe3629ba 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.h +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,7 @@ #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/paddle2cinn/cinn_cache_key.h" +#include "paddle/fluid/framework/rw_lock.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/macros.h" @@ -64,7 +66,17 @@ class CinnCompiler { const ir::Graph& FindGraph(const std::string& key) const; - std::int64_t real_compiled_num() const { return real_compiled_num_; } + void Clear() { + AutoWRLock guard{&rwlock_}; + graphs_.clear(); + cache_.clear(); + real_compiled_num_ = 0; + } + + std::int64_t real_compiled_num() const { + AutoRDLock guard{&rwlock_}; + return real_compiled_num_; + } ~CinnCompiler() = default; @@ -79,11 +91,14 @@ class CinnCompiler { std::unordered_map, CinnCacheKey::Hash> cache_; - std::atomic_int64_t real_compiled_num_{0}; + std::int64_t real_compiled_num_{0}; + mutable RWLock rwlock_; DISABLE_COPY_AND_ASSIGN(CinnCompiler); }; +extern std::string ReadableProtoStr(const std::string& bytes); + } // namespace paddle2cinn } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc index 3aa2626fe06b4..6e04067396dd6 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc @@ -24,6 +24,8 @@ #include #include "cinn/common/target.h" +#include "gflags/gflags.h" +#include "glog/logging.h" #include "gtest/gtest.h" #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/ir/graph.h" @@ -35,6 +37,9 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" +DECLARE_string(allow_cinn_ops); +DECLARE_string(deny_cinn_ops); + namespace paddle { namespace framework { namespace paddle2cinn { @@ -64,6 +69,17 @@ std::vector GetCompilationKeys(const Graph& graph) { return compilation_keys; } +// Extract op types from a graph +std::unordered_set ExtractOpTypes(const Graph& graph) { + std::unordered_set op_types; + for (auto& node : graph.Nodes()) { + if (node->IsOp()) { + op_types.emplace(node->Name()); + } + } + return op_types; +} + // Get inputs info std::unordered_map> GetInputsInfo( const std::string& key, const Graph& graph) { @@ -163,6 +179,49 @@ std::unique_ptr CreateGraph() { } // namespace +TEST(CinnCompilerTest, FlagController) { + // init + auto* cinn_compiler = CinnCompiler::GetInstance(); + auto cinn_pass = ir::PassRegistry::Instance().Get("build_cinn_pass"); + // apply build_cinn_pass & FLAGS_allow_cinn_ops="add" + { + FLAGS_allow_cinn_ops = "add"; + auto graph = CreateGraph(); + cinn_compiler->Clear(); + cinn_pass->Apply(graph.get()); + auto compilation_keys = GetCompilationKeys(*graph); + ASSERT_EQ(compilation_keys.size(), 0); + } + // apply build_cinn_pass & FLAGS_allow_cinn_ops="mul;relu" + { + FLAGS_allow_cinn_ops = "mul;relu"; + auto graph = CreateGraph(); + cinn_compiler->Clear(); + cinn_pass->Apply(graph.get()); + auto compilation_keys = GetCompilationKeys(*graph); + ASSERT_EQ(compilation_keys.size(), 2); + } + // apply build_cinn_pass & FLAGS_allow_cinn_ops="" & + // FLAGS_deny_cinn_ops="relu" + { + FLAGS_allow_cinn_ops = ""; + FLAGS_deny_cinn_ops = "elementwise_add;relu"; + auto graph = CreateGraph(); + cinn_compiler->Clear(); + cinn_pass->Apply(graph.get()); + auto compilation_keys = GetCompilationKeys(*graph); + ASSERT_EQ(compilation_keys.size(), 1); + const auto& compiling_graph = cinn_compiler->FindGraph(compilation_keys[0]); + auto op_types = ExtractOpTypes(compiling_graph); + ASSERT_EQ(op_types.size(), 2); + ASSERT_EQ(op_types.count("feed"), 1); + ASSERT_EQ(op_types.count("mul"), 1); + } + // recover flags + FLAGS_allow_cinn_ops = ""; + FLAGS_deny_cinn_ops = ""; +} + TEST(CinnCompilerTest, Compile) { auto viz_pass = ir::PassRegistry::Instance().Get("graph_viz_pass"); auto cinn_pass = ir::PassRegistry::Instance().Get("build_cinn_pass"); @@ -183,6 +242,7 @@ TEST(CinnCompilerTest, Compile) { ASSERT_EQ(compilation_keys.size(), 1); const auto& compilation_key = compilation_keys[0]; + VLOG(4) << "Compilation Key:\n" << ReadableProtoStr(compilation_key); auto* cinn_compiler = CinnCompiler::GetInstance(); const auto& compiling_graph = cinn_compiler->FindGraph(compilation_key); viz_graph("compiling_graph.dot", const_cast(&compiling_graph)); diff --git a/paddle/fluid/operators/cinn_launch_op.h b/paddle/fluid/operators/cinn_launch_op.h index 250f4be669614..db59dccb9230b 100644 --- a/paddle/fluid/operators/cinn_launch_op.h +++ b/paddle/fluid/operators/cinn_launch_op.h @@ -51,7 +51,8 @@ class CinnLaunchOpKernel : public framework::OpKernel { kCompilationKey)); const auto& compilation_key = ctx.template Attr(kCompilationKey); - VLOG(2) << "CinnLaunchOp compilation_key:" << compilation_key; + VLOG(2) << "CinnLaunchOp compilation_key:\n" + << ReadableProtoStr(compilation_key); const auto& graph = CinnCompiler::GetInstance()->FindGraph(compilation_key); auto input_variable_names = ctx.InputNames(kX); From 6560f069250d593a6ad47daabaeb717d508ddcdb Mon Sep 17 00:00:00 2001 From: Wang Zhen Date: Tue, 2 Nov 2021 11:26:23 +0000 Subject: [PATCH 05/14] Add more comments about cinn flags. --- .../fluid/framework/paddle2cinn/cinn_compiler.cc | 2 +- .../fluid/framework/paddle2cinn/cinn_compiler.h | 16 +++++++--------- paddle/fluid/platform/flags.cc | 3 ++- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc index 59e9f25a4270e..bc9cfae5801b1 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc @@ -100,9 +100,9 @@ const CinnCompiledObject& CinnCompiler::Compile( exist = cache_.count(cur_key) != 0; } if (!exist) { + real_compiled_num_++; auto compiled_res = CompileGraph(graph, input_tensors, target); AutoWRLock w_guard{&rwlock_}; - real_compiled_num_++; cache_[cur_key] = std::move(compiled_res); } AutoRDLock guard{&rwlock_}; diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.h b/paddle/fluid/framework/paddle2cinn/cinn_compiler.h index 3a623fe3629ba..b844e7d317818 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.h +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -67,16 +66,15 @@ class CinnCompiler { const ir::Graph& FindGraph(const std::string& key) const; void Clear() { - AutoWRLock guard{&rwlock_}; - graphs_.clear(); - cache_.clear(); + { + AutoWRLock guard{&rwlock_}; + graphs_.clear(); + cache_.clear(); + } real_compiled_num_ = 0; } - std::int64_t real_compiled_num() const { - AutoRDLock guard{&rwlock_}; - return real_compiled_num_; - } + std::int64_t real_compiled_num() const { return real_compiled_num_; } ~CinnCompiler() = default; @@ -91,7 +89,7 @@ class CinnCompiler { std::unordered_map, CinnCacheKey::Hash> cache_; - std::int64_t real_compiled_num_{0}; + std::atomic_int64_t real_compiled_num_{0}; mutable RWLock rwlock_; DISABLE_COPY_AND_ASSIGN(CinnCompiler); diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc index 09b0500dd3641..a674a6a8acdf2 100644 --- a/paddle/fluid/platform/flags.cc +++ b/paddle/fluid/platform/flags.cc @@ -738,7 +738,8 @@ PADDLE_DEFINE_EXPORTED_string(allow_cinn_ops, "", * Name: FLAGS_deny_cinn_ops * Since Version: 2.3 * Value Range: string, default="" - * Example: FLAGS_deny_cinn_ops="mul" would block `mul` op when using CINN + * Example: FLAGS_deny_cinn_ops="mul;relu" would block `mul` and `relu` two ops + * when using CINN */ PADDLE_DEFINE_EXPORTED_string(deny_cinn_ops, "", "It controls the cinn op subset to be not used."); From a2d9398b4c7f5e924247b88baa9ab4f402da406d Mon Sep 17 00:00:00 2001 From: Wang Zhen Date: Tue, 2 Nov 2021 12:27:01 +0000 Subject: [PATCH 06/14] Update the lock logic. --- .../framework/paddle2cinn/cinn_compiler.cc | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc index bc9cfae5801b1..65a3c4cd178bd 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc @@ -63,18 +63,15 @@ std::string CinnCompiler::AddGraph(std::unique_ptr graph) { program.Proto()->SerializeToString(&graph_key); VLOG(4) << "Add a graph into CinnCompiler, which is:\n" << ReadableProtoStr(graph_key); - bool exist = false; { - AutoRDLock r_guard{&rwlock_}; - exist = graphs_.count(graph_key) != 0; - } - if (!exist) { - AutoWRLock w_guard{&rwlock_}; - graphs_[graph_key] = std::move(graph); - } else { - LOG(WARNING) - << "The graph being added is already in CinnCompiler. Its key is:\n" - << ReadableProtoStr(graph_key); + AutoWRLock guard{&rwlock_}; + if (!graphs_.count(graph_key)) { + graphs_[graph_key] = std::move(graph); + } else { + LOG(WARNING) + << "The graph being added is already in CinnCompiler. Its key is:\n" + << ReadableProtoStr(graph_key); + } } return graph_key; } @@ -103,7 +100,9 @@ const CinnCompiledObject& CinnCompiler::Compile( real_compiled_num_++; auto compiled_res = CompileGraph(graph, input_tensors, target); AutoWRLock w_guard{&rwlock_}; - cache_[cur_key] = std::move(compiled_res); + if (!cache_.count(cur_key)) { + cache_[cur_key] = std::move(compiled_res); + } } AutoRDLock guard{&rwlock_}; const auto& cached_boj = *cache_[cur_key]; From 607a3cf93035d6e87c570e4f1258550abd55b316 Mon Sep 17 00:00:00 2001 From: Wang Zhen Date: Tue, 2 Nov 2021 13:39:14 +0000 Subject: [PATCH 07/14] Apply lock to the cache_ only in CinnCompiler. --- .../framework/paddle2cinn/cinn_compiler.cc | 19 +++++++------------ paddle/fluid/operators/cinn_launch_op.h | 2 +- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc index 65a3c4cd178bd..0a81c7cdea1e9 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc @@ -63,27 +63,22 @@ std::string CinnCompiler::AddGraph(std::unique_ptr graph) { program.Proto()->SerializeToString(&graph_key); VLOG(4) << "Add a graph into CinnCompiler, which is:\n" << ReadableProtoStr(graph_key); - { - AutoWRLock guard{&rwlock_}; - if (!graphs_.count(graph_key)) { - graphs_[graph_key] = std::move(graph); - } else { - LOG(WARNING) - << "The graph being added is already in CinnCompiler. Its key is:\n" - << ReadableProtoStr(graph_key); - } + if (!graphs_.count(graph_key)) { + graphs_[graph_key] = std::move(graph); + } else { + LOG(WARNING) + << "The graph being added is already in CinnCompiler. Its key is:\n" + << ReadableProtoStr(graph_key); } return graph_key; } const Graph& CinnCompiler::FindGraph(const std::string& graph_key) const { - AutoRDLock guard{&rwlock_}; PADDLE_ENFORCE_NE( graphs_.count(graph_key), 0, platform::errors::InvalidArgument("Can not find the target graph:\n%s", ReadableProtoStr(graph_key).c_str())); - const auto& graph = *graphs_.at(graph_key); - return graph; + return *graphs_.at(graph_key); } const CinnCompiledObject& CinnCompiler::Compile( diff --git a/paddle/fluid/operators/cinn_launch_op.h b/paddle/fluid/operators/cinn_launch_op.h index db59dccb9230b..ffb41b0868e82 100644 --- a/paddle/fluid/operators/cinn_launch_op.h +++ b/paddle/fluid/operators/cinn_launch_op.h @@ -52,7 +52,7 @@ class CinnLaunchOpKernel : public framework::OpKernel { const auto& compilation_key = ctx.template Attr(kCompilationKey); VLOG(2) << "CinnLaunchOp compilation_key:\n" - << ReadableProtoStr(compilation_key); + << framework::paddle2cinn::ReadableProtoStr(compilation_key); const auto& graph = CinnCompiler::GetInstance()->FindGraph(compilation_key); auto input_variable_names = ctx.InputNames(kX); From 442422e33c3a387ea96dd093bb95b31f167641a5 Mon Sep 17 00:00:00 2001 From: Wang Zhen Date: Tue, 2 Nov 2021 14:04:22 +0000 Subject: [PATCH 08/14] Use PADDLE_ENFORCE instead in CinnCompiler::AddGraph. --- .../framework/paddle2cinn/cinn_compiler.cc | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc index 0a81c7cdea1e9..082e91b7daee5 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc @@ -63,21 +63,21 @@ std::string CinnCompiler::AddGraph(std::unique_ptr graph) { program.Proto()->SerializeToString(&graph_key); VLOG(4) << "Add a graph into CinnCompiler, which is:\n" << ReadableProtoStr(graph_key); - if (!graphs_.count(graph_key)) { - graphs_[graph_key] = std::move(graph); - } else { - LOG(WARNING) - << "The graph being added is already in CinnCompiler. Its key is:\n" - << ReadableProtoStr(graph_key); - } + + PADDLE_ENFORCE_EQ( + graphs_.count(graph_key), 0, + platform::errors::PreconditionNotMet( + "The graph to be added is already in CinnCompiler, which is:\n", + ReadableProtoStr(graph_key).c_str())); + graphs_[graph_key] = std::move(graph); return graph_key; } const Graph& CinnCompiler::FindGraph(const std::string& graph_key) const { - PADDLE_ENFORCE_NE( - graphs_.count(graph_key), 0, - platform::errors::InvalidArgument("Can not find the target graph:\n%s", - ReadableProtoStr(graph_key).c_str())); + PADDLE_ENFORCE_NE(graphs_.count(graph_key), 0, + platform::errors::PreconditionNotMet( + "Can not find the target graph:\n%s", + ReadableProtoStr(graph_key).c_str())); return *graphs_.at(graph_key); } From ee88c0646779cde873eea2321b962e25353e2378 Mon Sep 17 00:00:00 2001 From: Wang Zhen Date: Wed, 3 Nov 2021 03:38:52 +0000 Subject: [PATCH 09/14] Put the StringSplit operation out of the teller. --- paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc index 37f1c02c3aebd..416340cb31f8e 100644 --- a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc +++ b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc @@ -359,18 +359,18 @@ void ReplaceSubGraphWithCinnOpNode(const GraphNodeSet& cluster, // all of op node supported by CINN. We using OpMapperRegistry // to check whether the op node supported by CINN. void SearchAllSubgraphs(Graph* graph) { - auto teller = [](const Node* node) { + auto allow_ops = StringSplit(FLAGS_allow_cinn_ops, kDelim); + auto deny_ops = StringSplit(FLAGS_deny_cinn_ops, kDelim); + auto teller = [&allow_ops, &deny_ops](const Node* node) { bool registered = ::cinn::frontend::OpMapperRegistry::Global()->Find( node->Name()) != nullptr; // if the op type is registered in CINN and allow_ops is not empty, return // true only when it is in allow_ops - auto allow_ops = StringSplit(FLAGS_allow_cinn_ops, kDelim); if (allow_ops.size()) { return registered && allow_ops.count(node->Name()); } // if the op type is registered in CINN and deny_ops is not empty, return // true only when it is not in deny_ops - auto deny_ops = StringSplit(FLAGS_deny_cinn_ops, kDelim); if (deny_ops.size()) { return registered && !deny_ops.count(node->Name()); } From c517414f338522da2990ca71ef1c27c7a336562a Mon Sep 17 00:00:00 2001 From: Wang Zhen Date: Wed, 3 Nov 2021 06:53:27 +0000 Subject: [PATCH 10/14] Add VizGraph method for CinnCompiler. --- .../framework/paddle2cinn/build_cinn_pass.cc | 3 +- .../framework/paddle2cinn/cinn_compiler.cc | 123 +++++++++++++----- .../framework/paddle2cinn/cinn_compiler.h | 15 +-- .../paddle2cinn/cinn_compiler_test.cc | 3 +- paddle/fluid/operators/cinn_launch_op.h | 6 +- 5 files changed, 100 insertions(+), 50 deletions(-) diff --git a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc index 416340cb31f8e..b90dbd7dcd845 100644 --- a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc +++ b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc @@ -412,7 +412,8 @@ void SearchAllSubgraphs(Graph* graph) { // save it in CinnCompiler std::string compilation_key = cinn_compiler->AddGraph(CreateNewSubGraph( cluster_set, cluster_internals, cluster_inputs, cluster_outputs)); - VLOG(4) << "Compilation Key:\n" << ReadableProtoStr(compilation_key); + VLOG(4) << "Compilation Key:\n" + << cinn_compiler->ReadableKey(compilation_key); // Replace the found cluster to a new cinn op node ReplaceSubGraphWithCinnOpNode(cluster_set, cluster_inputs, cluster_outputs, diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc index 082e91b7daee5..368c83b1039c8 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc @@ -14,14 +14,15 @@ #include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" +#include #include #include #include +#include #include "cinn/common/target.h" #include "cinn/common/type.h" #include "cinn/frontend/decomposer/use_decomposer.h" -#include "cinn/frontend/net_builder.h" // need to remove after #include "cinn/frontend/pass/use_program_pass.h" #include "cinn/frontend/program_pass.h" #include "cinn/frontend/syntax.h" @@ -32,18 +33,23 @@ #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_helper.h" +#include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/rw_lock.h" #include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/inference/analysis/dot.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/string/string_helper.h" namespace paddle { namespace framework { namespace paddle2cinn { using ir::Graph; +using ir::Node; +using inference::analysis::Dot; using ::cinn::common::Target; using ::cinn::common::Float; using ::cinn::hlir::framework::GraphCompiler; @@ -56,31 +62,6 @@ CinnCompiler* CinnCompiler::GetInstance() { return &instance; } -std::string CinnCompiler::AddGraph(std::unique_ptr graph) { - std::string graph_key; - ProgramDesc program; - GraphToProgram(*graph, &program); - program.Proto()->SerializeToString(&graph_key); - VLOG(4) << "Add a graph into CinnCompiler, which is:\n" - << ReadableProtoStr(graph_key); - - PADDLE_ENFORCE_EQ( - graphs_.count(graph_key), 0, - platform::errors::PreconditionNotMet( - "The graph to be added is already in CinnCompiler, which is:\n", - ReadableProtoStr(graph_key).c_str())); - graphs_[graph_key] = std::move(graph); - return graph_key; -} - -const Graph& CinnCompiler::FindGraph(const std::string& graph_key) const { - PADDLE_ENFORCE_NE(graphs_.count(graph_key), 0, - platform::errors::PreconditionNotMet( - "Can not find the target graph:\n%s", - ReadableProtoStr(graph_key).c_str())); - return *graphs_.at(graph_key); -} - const CinnCompiledObject& CinnCompiler::Compile( const Graph& graph, const std::map& input_tensors, @@ -108,12 +89,90 @@ const CinnCompiledObject& CinnCompiler::Compile( const std::string& compilation_key, const std::map& input_tensors, const Target& target) { - VLOG(4) << "The graph to be compiled is:\n" - << ReadableProtoStr(compilation_key); + VLOG(4) << "-- The graph to be compiled is:\n" << VizGraph(compilation_key); const auto& graph = FindGraph(compilation_key); return Compile(graph, input_tensors, target); } +std::string CinnCompiler::AddGraph(std::unique_ptr graph) { + std::string graph_key; + ProgramDesc program; + GraphToProgram(*graph, &program); + program.Proto()->SerializeToString(&graph_key); + + PADDLE_ENFORCE_EQ( + graphs_.count(graph_key), 0, + platform::errors::PreconditionNotMet( + "The graph to be added is already in CinnCompiler, which is:\n", + VizGraph(graph_key).c_str())); + graphs_[graph_key] = std::move(graph); + VLOG(4) << "-- Add a graph into CinnCompiler, which is:\n" + << VizGraph(graph_key); + return graph_key; +} + +const Graph& CinnCompiler::FindGraph(const std::string& graph_key) const { + PADDLE_ENFORCE_NE( + graphs_.count(graph_key), 0, + platform::errors::PreconditionNotMet( + "Can not find the target graph, of which the key is:\n%s", + ReadableKey(graph_key).c_str())); + return *graphs_.at(graph_key); +} + +std::string CinnCompiler::VizGraph(const std::string& key) const { + Dot dot; + std::unordered_map node2dot; + const Graph& graph = FindGraph(key); + int id = 0; + // Create nodes + for (const Node* n : graph.Nodes()) { + std::string node_id = "Node" + std::to_string(id++); + if (n->IsOp()) { + dot.AddNode(node_id, {Dot::Attr("shape", "box"), + Dot::Attr("style", "rounded,filled,bold")}, + n->Name()); + } else if (n->IsVar()) { + auto label = n->Name(); + if (n->Var() && n->Var()->GetType() == proto::VarType::LOD_TENSOR) { + auto shape = n->Var()->GetShape(); + std::vector shape_str(shape.size()); + std::transform(shape.begin(), shape.end(), shape_str.begin(), + [](const auto& val) { return std::to_string(val); }); + label += "\n" + string::join_strings(shape_str, ','); + } + dot.AddNode(node_id, + {Dot::Attr("shape", "ellipse"), Dot::Attr("style", "bold")}, + label); + } + node2dot[n] = node_id; + } + // Create edges + for (const Node* n : graph.Nodes()) { + const auto& src_id = node2dot.at(n); + for (auto* out : n->outputs) { + const auto& dest_id = node2dot.at(out); + dot.AddEdge(src_id, dest_id, {}); + } + } + return dot.Build(); +} + +std::string CinnCompiler::ReadableKey(const std::string& key) const { + proto::ProgramDesc desc; + desc.ParseFromString(key); + return desc.DebugString(); +} + +void CinnCompiler::Clear() { + { + AutoWRLock guard{&rwlock_}; + graphs_.clear(); + cache_.clear(); + } + real_compiled_num_ = 0; +} + std::unique_ptr CinnCompiler::CompileGraph( const ir::Graph& graph, const std::map& input_tensors, @@ -124,7 +183,7 @@ std::unique_ptr CinnCompiler::CompileGraph( ProgramPass::Apply(&frontend_program, target, {"Decomposer"}); auto cinn_graph = std::make_shared<::cinn::hlir::framework::Graph>( frontend_program, target); - VLOG(4) << "The " << real_compiled_num_ << "-th compilation (" + VLOG(4) << "-- The " << real_compiled_num_ << "-th compilation (" << target.arch_str() << "), and its related graph:\n" << cinn_graph->Visualize(); ApplyPass(cinn_graph.get(), "OpFusion"); @@ -142,12 +201,6 @@ std::unique_ptr CinnCompiler::CompileGraph( return compiled_obj; } -std::string ReadableProtoStr(const std::string& bytes) { - proto::ProgramDesc program_desc; - program_desc.ParseFromString(bytes); - return program_desc.DebugString(); -} - } // namespace paddle2cinn } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.h b/paddle/fluid/framework/paddle2cinn/cinn_compiler.h index b844e7d317818..3996c62cb943e 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.h +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.h @@ -65,14 +65,11 @@ class CinnCompiler { const ir::Graph& FindGraph(const std::string& key) const; - void Clear() { - { - AutoWRLock guard{&rwlock_}; - graphs_.clear(); - cache_.clear(); - } - real_compiled_num_ = 0; - } + std::string VizGraph(const std::string& key) const; + + std::string ReadableKey(const std::string& key) const; + + void Clear(); std::int64_t real_compiled_num() const { return real_compiled_num_; } @@ -95,8 +92,6 @@ class CinnCompiler { DISABLE_COPY_AND_ASSIGN(CinnCompiler); }; -extern std::string ReadableProtoStr(const std::string& bytes); - } // namespace paddle2cinn } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc index 6e04067396dd6..145d3d83d4509 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc @@ -242,8 +242,9 @@ TEST(CinnCompilerTest, Compile) { ASSERT_EQ(compilation_keys.size(), 1); const auto& compilation_key = compilation_keys[0]; - VLOG(4) << "Compilation Key:\n" << ReadableProtoStr(compilation_key); auto* cinn_compiler = CinnCompiler::GetInstance(); + VLOG(4) << "The graph to be compiled:\n" + << cinn_compiler->VizGraph(compilation_key); const auto& compiling_graph = cinn_compiler->FindGraph(compilation_key); viz_graph("compiling_graph.dot", const_cast(&compiling_graph)); diff --git a/paddle/fluid/operators/cinn_launch_op.h b/paddle/fluid/operators/cinn_launch_op.h index 43d5a93383a7c..858baffcac358 100644 --- a/paddle/fluid/operators/cinn_launch_op.h +++ b/paddle/fluid/operators/cinn_launch_op.h @@ -97,9 +97,9 @@ class CinnLaunchOpKernel : public framework::OpKernel { kCompilationKey)); const auto& compilation_key = ctx.template Attr(kCompilationKey); - VLOG(4) << "The value of CinnLaunchOp attribute(" << kCompilationKey - << "):\n" - << framework::paddle2cinn::ReadableProtoStr(compilation_key); + VLOG(4) << "CinnLaunchOp attribute(" << kCompilationKey << ") " + << "value:\n" + << CinnCompiler::GetInstance()->ReadableKey(compilation_key); const auto& graph = CinnCompiler::GetInstance()->FindGraph(compilation_key); auto input_variable_names = ctx.InputNames(kX); From 2d878628171fae57d7940b29f8f5545bdfd3caba Mon Sep 17 00:00:00 2001 From: Wang Zhen Date: Wed, 3 Nov 2021 07:03:32 +0000 Subject: [PATCH 11/14] Update VizGraph in CinnCompiler. --- paddle/fluid/framework/paddle2cinn/cinn_compiler.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc index 368c83b1039c8..77ed969cc7d86 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc @@ -141,8 +141,8 @@ std::string CinnCompiler::VizGraph(const std::string& key) const { [](const auto& val) { return std::to_string(val); }); label += "\n" + string::join_strings(shape_str, ','); } - dot.AddNode(node_id, - {Dot::Attr("shape", "ellipse"), Dot::Attr("style", "bold")}, + dot.AddNode(node_id, {Dot::Attr("shape", "box"), + Dot::Attr("style", "rounded,bold")}, label); } node2dot[n] = node_id; From 88bd65d3e20723af4a0fdd8b9838a98eeaa5cbae Mon Sep 17 00:00:00 2001 From: Wang Zhen Date: Wed, 3 Nov 2021 07:14:07 +0000 Subject: [PATCH 12/14] Update the UT test_parallel_executor_run_cinn.py. --- .../unittests/test_parallel_executor_run_cinn.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py index 8697883ac88a8..d630fb54ebd0d 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py @@ -40,9 +40,9 @@ def set_cinn_flag(val): def reader(limit): - for i in range(limit): - yield np.ones([1, 28]).astype('float32') * (i * 3.14 / (i + 1)), \ - np.array([i + 1]).astype('int64') + for _ in range(limit): + yield np.random.random([1, 28]).astype('float32'), \ + np.random.randint(0, 2, size=[1]).astype('int64') def rand_data(img, label, loop_num=10): @@ -62,7 +62,7 @@ def build_program(main_program, startup_program): shape=[1, 28], dtype="float32", attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Assign( - np.ones([1, 28]).astype(np.float32)))) + np.random.rand(1, 28).astype(np.float32)))) label = paddle.static.data(name="label", shape=[1], dtype='int64') hidden = paddle.add(img, param) @@ -75,7 +75,11 @@ def build_program(main_program, startup_program): return img, label, avg_loss -def train(dot_save_dir, prefix): +def train(dot_save_dir, prefix, seed=1234): + np.random.seed(seed) + paddle.seed(seed) + if paddle.is_compiled_with_cuda(): + paddle.set_flags({'FLAGS_cudnn_deterministic': 1}) startup_program = paddle.static.Program() main_program = paddle.static.Program() img, label, loss = build_program(main_program, startup_program) From 53e8a37b0a61b86afeb36b4cdc3b38faecf546ca Mon Sep 17 00:00:00 2001 From: Wang Zhen Date: Wed, 3 Nov 2021 07:16:03 +0000 Subject: [PATCH 13/14] Imporve the readability of the UT. --- .../fluid/tests/unittests/test_parallel_executor_run_cinn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py index d630fb54ebd0d..d9ae3cf5e757d 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py @@ -80,6 +80,7 @@ def train(dot_save_dir, prefix, seed=1234): paddle.seed(seed) if paddle.is_compiled_with_cuda(): paddle.set_flags({'FLAGS_cudnn_deterministic': 1}) + startup_program = paddle.static.Program() main_program = paddle.static.Program() img, label, loss = build_program(main_program, startup_program) From 9433f0bd9e589256d07488190b9f077b12f2cb2b Mon Sep 17 00:00:00 2001 From: Wang Zhen Date: Wed, 3 Nov 2021 07:45:23 +0000 Subject: [PATCH 14/14] Update the dot style of VizGraph in CinnCompiler. --- .../framework/paddle2cinn/cinn_compiler.cc | 18 ++++++++++++------ paddle/fluid/operators/cinn_launch_op.cc | 1 - 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc index 77ed969cc7d86..f9c28f4277690 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc @@ -129,9 +129,11 @@ std::string CinnCompiler::VizGraph(const std::string& key) const { for (const Node* n : graph.Nodes()) { std::string node_id = "Node" + std::to_string(id++); if (n->IsOp()) { - dot.AddNode(node_id, {Dot::Attr("shape", "box"), - Dot::Attr("style", "rounded,filled,bold")}, - n->Name()); + dot.AddNode( + node_id, + {Dot::Attr("shape", "box"), Dot::Attr("style", "rounded,filled,bold"), + Dot::Attr("color", "#303A3A"), Dot::Attr("fontcolor", "#ffffff")}, + n->Name()); } else if (n->IsVar()) { auto label = n->Name(); if (n->Var() && n->Var()->GetType() == proto::VarType::LOD_TENSOR) { @@ -141,9 +143,13 @@ std::string CinnCompiler::VizGraph(const std::string& key) const { [](const auto& val) { return std::to_string(val); }); label += "\n" + string::join_strings(shape_str, ','); } - dot.AddNode(node_id, {Dot::Attr("shape", "box"), - Dot::Attr("style", "rounded,bold")}, - label); + dot.AddNode( + node_id, + {Dot::Attr("shape", "box"), Dot::Attr("style", "rounded,filled,bold"), + Dot::Attr("color", n->Var()->IsParameter() ? "#148b97" : "#dddddd"), + Dot::Attr("fontcolor", + n->Var()->IsParameter() ? "#ffffff" : "#000000")}, + label); } node2dot[n] = node_id; } diff --git a/paddle/fluid/operators/cinn_launch_op.cc b/paddle/fluid/operators/cinn_launch_op.cc index b81ad11b06c1a..a17f1037318cb 100644 --- a/paddle/fluid/operators/cinn_launch_op.cc +++ b/paddle/fluid/operators/cinn_launch_op.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "paddle/fluid/operators/cinn_launch_op.h" -#include "cinn/frontend/var_type_utils.h" #include "paddle/fluid/string/string_helper.h" namespace paddle {