PaddlePaddle · Aurelius84 · Dec 13, 2021 · Dec 8, 2021 · Dec 8, 2021 · Dec 8, 2021
diff --git a/paddle/fluid/framework/new_executor/interpretercore_garbage_collector.cc b/paddle/fluid/framework/new_executor/interpretercore_garbage_collector.cc
@@ -69,11 +69,15 @@ void InterpreterCoreGarbageCollector::Add(paddle::framework::Variable* var,
   } else if (var->IsType<
                  operators::reader::
                      OrderedMultiDeviceLoDTensorBlockingQueueHolder>()) {
-    // var->Clear(); // TODO(xiongkun03) can we clear directly? Why we must use
-    // Add interface?
+    // TODO(xiongkun03) in old executor, this type of variable is not support
+    // eager deletion. so we just leave it here ?
+  } else if (var->IsType<LoDRankTable>()) {
+    // TODO(xiongkun03) in old executor, this type of variable is not support
+    // eager deletion. so we just leave it here ?
   } else if (var->IsType<SelectedRows>()) {
     Add(var->GetMutable<SelectedRows>()->mutable_value()->MoveMemoryHolder(),
         event, ctx);
+    var->GetMutable<SelectedRows>()->mutable_rows()->clear();
   } else if (var->IsType<LoDTensorArray>()) {
     auto* tensor_arr = var->GetMutable<LoDTensorArray>();
     for (auto& t : *tensor_arr) {

diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
@@ -1419,6 +1419,18 @@ All parameter, weight, gradient are variables in Paddle.
                out (core.Variable|None): the found variable or None.
            )DOC",
            py::return_value_policy::reference)
+      .def("erase", &Scope::EraseVars, py::arg("names"),
+           R"DOC(
+           Find variable named :code:`name` in the current scope or
+           its parent scope. Return None if not found. 
+
+           Args:
+               name (str): the variable names to be erase.
+
+           Returns:
+               None
+           )DOC",
+           py::return_value_policy::reference)
       .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); },
            R"DOC(
            Create a new sub-scope of the current scope.

diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
@@ -536,9 +536,12 @@ def _reset_activation_persistable(self):
         '''
         Reset activations to be not persistable.
         '''
+        to_erase = []
         for var in self._program.list_vars():
             if var.name in self._quantized_act_var_name:
                 var.persistable = False
+                to_erase.append(var.name)
+        self._scope.erase(to_erase)
 
     def _sampling(self):
         '''

diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py
@@ -401,7 +401,17 @@ def _is_enable_standalone_executor():
 
 
 def _get_strong_program_cache_key(program, feed, fetch_list):
-    return str(id(program)) + _get_program_cache_key(feed, fetch_list)
+    # NOTE(xiongkun) id(proram) may be duplicate. So add addition var_name as cache key. 
+    def _get_varname_from_block(block):
+        block_str = []
+        for var_name in list(block.vars.keys()):
+            block_str.append(var_name)
+        return "\n".join(block_str)
+
+    inner_program = program._program if isinstance(
+        program, compiler.CompiledProgram) else program
+    return _get_varname_from_block(inner_program.blocks[0]) + str(id(
+        program)) + _get_program_cache_key(feed, fetch_list)
 
 
 def _get_program_cache_key(feed, fetch_list):

diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
@@ -316,7 +316,7 @@ def make_all_inputs(input_fields):
 # how many batches we use
 batch_num = 5
 
-np.random.seed = 90
+np.random.seed(90)
 src_word_np = np.arange(1, TrainTaskConfig.batch_size * seq_len + 1).reshape(
     [TrainTaskConfig.batch_size, seq_len]).astype('int64')
 src_pos_np = np.random.randint(
@@ -951,6 +951,8 @@ def transformer_sort_gradient_float32(self, is_sparse):
 
         with guard():
             fluid.set_flags({'FLAGS_sort_sum_gradient': True})
+            # NOTE(xiongkun03): In new executor, the inplace strategy is on by default, which will cause result of sumop have some differences. So we disable inplace.
+            fluid.set_flags({'FLAGS_new_executor_use_inplace': False})
             paddle.seed(seed)
             paddle.framework.random._manual_program_seed(seed)
             transformer = TransFormer(