PaddlePaddle · SigureMo · May 6, 2024 · Apr 18, 2024 · Apr 18, 2024 · Apr 19, 2024
diff --git a/paddle/fluid/eager/to_static/run_program_op_func.h b/paddle/fluid/eager/to_static/run_program_op_func.h
@@ -326,16 +326,7 @@ inline void pir_run_program_ad_func(
 
     grad_node->SetStepScope(step_scope);  // just for set useable.
 
-    // Set Grad out rank as same as fwd input and set stop gradient to bwd
-    // NOTE(@xiongkun): Not every tensor in x(list of tensor) is required
-    // gradient. for example: x[1] is not used for output, the x[1] is ignored.
-
-    std::vector<const paddle::Tensor*> x_require_grad;
-    for (size_t i = 0; i < x.size(); ++i) {
-      x_require_grad.push_back(&x[i]);
-    }
-
-    grad_node->SetGradOutMeta(x_require_grad, /*slot id*/ 0);
+    grad_node->SetGradOutMeta(x, /*slot id*/ 0);
     grad_node->SetGradOutMeta(params, /*slot id*/ 1);
 
     // TODO(@xiongkun): rewrite by new ir representation.

diff --git a/paddle/fluid/eager/to_static/run_program_op_node.h b/paddle/fluid/eager/to_static/run_program_op_node.h
@@ -467,21 +467,18 @@ inline void PirRunProgramAPI(
   auto param_values =
       PADDLE_GET_CONST(std::vector<::pir::Value>, attrs.at("fp"));
 
-  auto *forward_global_block =
-      PADDLE_GET_CONST(::pir::Block *, attrs.at("forward_global_block"));
-  auto *backward_global_block =
-      PADDLE_GET_CONST(::pir::Block *, attrs.at("backward_global_block"));
+  auto forward_program = PADDLE_GET_CONST(std::shared_ptr<::pir::Program>,
+                                          attrs.at("forward_program"));
+  auto backward_program = PADDLE_GET_CONST(std::shared_ptr<::pir::Program>,
+                                           attrs.at("backward_program"));
 
-  auto *forward_program =
-      forward_global_block->GetParentOp()->GetParentProgram();
+  ::pir::Block *forward_global_block = forward_program->block();
 
   if (FLAGS_print_ir) {
     std::ostringstream print_stream;
     print_stream << "ForwardProgram is :\n";
     forward_program->Print(print_stream);
     if (!is_test) {
-      auto *backward_program =
-          backward_global_block->GetParentOp()->GetParentProgram();
       print_stream << "BackwardProgram is:\n";
       backward_program->Print(print_stream);
     } else {
@@ -514,7 +511,7 @@ inline void PirRunProgramAPI(
         forward_global_block, params, param_values, global_inner_scope);
     // Step 2. create new interpretercore
     auto passed_kernel_program =
-        paddle::framework::ApplyIrPass(forward_program, place);
+        paddle::framework::ApplyIrPass(forward_program.get(), place);
     if (FLAGS_print_ir) {
       std::ostringstream print_stream;
       print_stream << "LoweredProgram( AfterPass ) is :\n";
@@ -1046,10 +1043,8 @@ inline void PirRunProgramGradAPI(
 
   VLOG(4) << "global_inner_scope:" << global_inner_scope;
 
-  auto *backward_global_block =
-      PADDLE_GET_CONST(::pir::Block *, attrs.at("backward_global_block"));
-  auto *backward_program =
-      backward_global_block->GetParentOp()->GetParentProgram();
+  auto backward_program = PADDLE_GET_CONST(std::shared_ptr<::pir::Program>,
+                                           attrs.at("backward_program"));
 
   auto output_grad_values =
       PADDLE_GET_CONST(std::vector<::pir::Value>, attrs.at("bo_g"));
@@ -1069,26 +1064,32 @@ inline void PirRunProgramGradAPI(
   details::Trans2ContiguousTensorsInplace(out_grad);
 
   // share x, param, middles, output_grads, out into scope.
+  details::ShareTensorsIntoScopeByValue(backward_program->block(),
+                                        out_grad,
+                                        output_grad_values,
+                                        global_inner_scope);
   details::ShareTensorsIntoScopeByValue(
-      backward_global_block, out_grad, output_grad_values, global_inner_scope);
-  details::ShareTensorsIntoScopeByValue(
-      backward_global_block, x, forward_input_values, global_inner_scope);
-  details::ShareTensorsIntoScopeByValue(backward_global_block,
+      backward_program->block(), x, forward_input_values, global_inner_scope);
+  details::ShareTensorsIntoScopeByValue(backward_program->block(),
                                         middles,
                                         forward_middle_values,
                                         global_inner_scope);
+  details::ShareTensorsIntoScopeByValue(backward_program->block(),
+                                        out,
+                                        forward_output_values,
+                                        global_inner_scope);
   details::ShareTensorsIntoScopeByValue(
-      backward_global_block, out, forward_output_values, global_inner_scope);
-  details::ShareTensorsIntoScopeByValue(
-      backward_global_block, params, parameter_values, global_inner_scope);
+      backward_program->block(), params, parameter_values, global_inner_scope);
 
   // Clear out and middles to avoid hold memory until backward finish.
   out.clear();
   middles.clear();
+  VLOG(1) << "out and middles clear end";
 
   auto &cache = paddle::framework::InterpreterCoreInfoCache::Instance();
   std::shared_ptr<paddle::framework::InterpreterCore> interpreter_core =
       nullptr;
+
   if (!cache.Has(program_id,
                  global_inner_scope,
                  place_hash_key,
@@ -1101,7 +1102,7 @@ inline void PirRunProgramGradAPI(
     VLOG(2) << "No interpretercore cache, so create a new interpretercore";
     // Step 1. share input_vars & parameters into scope
     auto passed_kernel_program =
-        paddle::framework::ApplyIrPass(backward_program, place);
+        paddle::framework::ApplyIrPass(backward_program.get(), place);
 
     const auto &new_block = passed_kernel_program->block();
     passed_kernel_program = paddle::framework::ApplyRemoveShadowFeedPass(
@@ -1143,10 +1144,10 @@ inline void PirRunProgramGradAPI(
     // get all eager gc vars
     std::set<std::string> skip_eager_delete_vars;
     auto skip_names = details::GetNameFromValue(
-        backward_global_block, x_grad_values, false, true);
+        backward_program->block(), x_grad_values, false, true);
     skip_eager_delete_vars.insert(skip_names.begin(), skip_names.end());
     skip_names = details::GetNameFromValue(
-        backward_global_block, p_grad_values, false, true);
+        backward_program->block(), p_grad_values, false, true);
     skip_eager_delete_vars.insert(skip_names.begin(), skip_names.end());
     interpreter_core->SetSkipGcVars(skip_eager_delete_vars);
     cache.UpdateSkipEagerDeleteVars(program_id,
@@ -1179,7 +1180,7 @@ inline void PirRunProgramGradAPI(
     }
   }
 
-  if (!backward_global_block->empty()) {
+  if (!backward_program->block()->empty()) {
     paddle::platform::RecordEvent record_event(
         "interpreter_core_run",
         paddle::platform::TracerEventType::UserDefined,
@@ -1194,9 +1195,11 @@ inline void PirRunProgramGradAPI(
         "fetch_and_gc", paddle::platform::TracerEventType::UserDefined, 1);
     // Step 4. get outputs
     details::ShareTensorsFromScopeByValue(
-        backward_global_block, x_grad, x_grad_values, global_inner_scope);
-    details::ShareTensorsFromScopeByValue(
-        backward_global_block, params_grad, p_grad_values, global_inner_scope);
+        backward_program->block(), x_grad, x_grad_values, global_inner_scope);
+    details::ShareTensorsFromScopeByValue(backward_program->block(),
+                                          params_grad,
+                                          p_grad_values,
+                                          global_inner_scope);
     VLOG(4) << "after backward gc all vars";
     global_inner_scope->SetCanReused(true);
     details::GcScope(global_inner_scope);
@@ -1335,8 +1338,7 @@ class GradNodeRunProgram : public egr::GradNodeBase {
       if (x[i].is_dense_tensor()) {
         x_grad->emplace_back(std::make_shared<phi::DenseTensor>());
       } else if (x[i].is_selected_rows()) {
-        auto selected_row = std::make_shared<phi::SelectedRows>();
-        x_grad->emplace_back(selected_row);
+        x_grad->emplace_back(std::make_shared<phi::SelectedRows>());
       }
       x_grad->back().set_name(x_grad_names[i]);
     }
@@ -1471,6 +1473,10 @@ class PirGradNodeRunProgram : public egr::GradNodeBase {
     VLOG(3) << "End Eager Backward Node: PirGradNodeRunProgram";
 
     *executed_ = true;
+    egr::EagerUtils::FillZeroForEmptyOptionalGradOutput(&x_grad,
+                                                        this->OutputMeta()[0]);
+    egr::EagerUtils::FillZeroForEmptyOptionalGradOutput(&params_grad,
+                                                        this->OutputMeta()[1]);
     return {x_grad, params_grad};
   }
 

diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc
@@ -26,6 +26,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/ops_extra_info.h"
 #include "paddle/phi/common/complex.h"
 #include "paddle/pir/include/core/block.h"
+#include "paddle/pir/include/core/program.h"
 #include "paddle/pir/include/core/value.h"
 #include "paddle/utils/blank.h"
 
@@ -977,6 +978,9 @@ struct SetAttrDescVisitor {
   void operator()(const std::vector<pir::Block *> &v) const {
     // just do nothing.
   }
+  void operator()(const std::shared_ptr<pir::Program> &v) const {
+    // just do nothing.
+  }
   void operator()(const std::vector<VarDesc *> &v) const {
     std::vector<std::string> var_names;
     for (auto var : v) {

diff --git a/paddle/fluid/framework/type_defs.cc b/paddle/fluid/framework/type_defs.cc
@@ -39,7 +39,8 @@ template class variant<paddle::blank,
                        paddle::experimental::Scalar,
                        std::vector<paddle::experimental::Scalar>,
                        ::pir::Block*,
-                       std::vector<::pir::Value>>;
+                       std::vector<::pir::Value>,
+                       std::shared_ptr<::pir::Program>>;
 }  // namespace paddle
 REGISTER_LOG_SIMPLY_STR(paddle::framework::AttributeMap);
 REGISTER_LOG_SIMPLY_STR(paddle::framework::Attribute);
diff --git a/paddle/fluid/framework/type_defs.h b/paddle/fluid/framework/type_defs.h
@@ -27,6 +27,7 @@ limitations under the License. */
 #include "paddle/phi/common/scalar.h"
 #include "paddle/phi/core/infermeta_utils.h"
 #include "paddle/pir/include/core/block.h"
+#include "paddle/pir/include/core/program.h"
 #include "paddle/pir/include/core/value.h"
 #include "paddle/utils/blank.h"
 #include "paddle/utils/small_vector.h"
@@ -40,6 +41,7 @@ class InferShapeContext;
 class InferVarTypeContext;
 class VarDesc;
 class BlockDesc;
+class ProgramDesc;
 class Variable;
 class InferNoNeedBufferVarsFN;
 
@@ -67,7 +69,8 @@ using Attribute = paddle::variant<paddle::blank,
                                   paddle::experimental::Scalar,
                                   std::vector<paddle::experimental::Scalar>,
                                   ::pir::Block*,
-                                  std::vector<::pir::Value>>;
+                                  std::vector<::pir::Value>,
+                                  std::shared_ptr<::pir::Program>>;
 using AttributeMap = std::unordered_map<std::string, Attribute>;
 
 using OpCreator =

diff --git a/paddle/fluid/pybind/op_function_common.cc b/paddle/fluid/pybind/op_function_common.cc
@@ -36,6 +36,7 @@
 #include "paddle/phi/common/complex.h"
 #include "paddle/pir/include/core/block.h"
 #include "paddle/pir/include/core/op_result.h"
+#include "paddle/pir/include/core/region.h"
 #include "paddle/pir/include/core/value.h"
 
 namespace paddle {
@@ -858,6 +859,17 @@ void CastPyArg2AttrIRBlock(PyObject* obj,
   attrs[key] = reinterpret_cast<::pir::Block*&>(vh[0]);
 }
 
+void CastPyArg2AttrIRProgram(PyObject* obj,
+                             paddle::framework::AttributeMap& attrs,  // NOLINT
+                             const std::string& key,
+                             const std::string& op_type,
+                             ssize_t arg_pos) {
+  VLOG(1) << "After Process pir::Program*";
+  const std::shared_ptr<::pir::Program> program =
+      ::py::handle(obj).cast<std::shared_ptr<::pir::Program>>();
+  attrs[key] = program;
+}
+
 void CastPyArg2AttrValues(PyObject* obj,
                           paddle::framework::AttributeMap& attrs,  // NOLINT
                           const std::string& key,
@@ -998,6 +1010,7 @@ void ConstructAttrMapForRunProgram(
                         attr_end));
 
   PyObject* obj = nullptr;
+  attrs["testkey"] = std::string("testvalue");
   for (ssize_t arg_pos = attr_start; arg_pos < attr_end; arg_pos += 2) {
     VLOG(1) << "Start Process " << arg_pos;
     Py_ssize_t key_len = 0;
@@ -1020,11 +1033,11 @@ void ConstructAttrMapForRunProgram(
 
     if (std::set<std::string>({"cuda_graph_capture_mode"}).count(key)) {
       CastPyArg2AttrString(obj, attrs, key, op_type, arg_pos);
-    } else if (std::set<std::string>({"global_block",
-                                      "forward_global_block",
-                                      "backward_global_block"})
-                   .count(key)) {
+    } else if (std::set<std::string>({"global_block"}).count(key)) {
       CastPyArg2AttrIRBlock(obj, attrs, key, op_type, arg_pos);
+    } else if (std::set<std::string>({"forward_program", "backward_program"})
+                   .count(key)) {
+      CastPyArg2AttrIRProgram(obj, attrs, key, op_type, arg_pos);
     } else if (std::set<std::string>({"is_test", "use_interpretorcore"})
                    .count(key)) {
       CastPyArg2AttrBoolean(obj, attrs, key, op_type, arg_pos);

diff --git a/paddle/fluid/pybind/pir.cc b/paddle/fluid/pybind/pir.cc
@@ -255,7 +255,7 @@ void BindProgram(py::module *m) {
   )DOC");
   program
       .def(py::init([]() {
-        return std::make_unique<Program>(pir::IrContext::Instance());
+        return std::make_shared<Program>(pir::IrContext::Instance());
       }))
       .def("__str__",
            [](const std::shared_ptr<Program> &self) {

diff --git a/python/paddle/jit/dy2static/pir_partial_program.py b/python/paddle/jit/dy2static/pir_partial_program.py
@@ -899,10 +899,10 @@ def _prune_unused_params(self, program):
 
     def _prepare_attributes(self):
         attrs = [
-            'forward_global_block',
-            self.program.forward_program.global_block(),
-            'backward_global_block',
-            self.program.backward_program.global_block(),
+            'forward_program',
+            self.program.forward_program,
+            'backward_program',
+            self.program.backward_program,
             'is_test',
             not self.training,
             'program_id',

@@ -15,7 +15,7 @@
 import unittest
 
 import numpy
-from dygraph_to_static_utils import Dy2StTestBase
+from dygraph_to_static_utils import Dy2StTestBase, test_legacy_and_pt_and_pir
 
 import paddle
 
@@ -33,6 +33,7 @@ def main_func(x, index):
 
 
 class TestNoGradientCase(Dy2StTestBase):
+    @test_legacy_and_pt_and_pir
     def test_no_gradient(self):
         paddle.disable_static()
         x = paddle.randn([10, 3])