Merge branch 'develop' into nearest_bf16

PaddlePaddle · Feb 21, 2022 · fe73d3b · fe73d3b · paddle-bot-old · Feb 23, 2022
2 parents 2949758 + 9c51eee
commit fe73d3b
Show file tree

Hide file tree

Showing 1,936 changed files with 16,822 additions and 15,735 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,14 +5,14 @@ paddle/fluid/API_PR.spec
 paddle/fluid/eager/api/generated/*
 paddle/fluid/op_use_default_grad_maker_DEV.spec
 paddle/fluid/op_use_default_grad_maker_PR.spec
-paddle/pten/api/backward/backward_api.h
-paddle/pten/api/include/api.h
-paddle/pten/api/lib/api.cc
-paddle/pten/api/lib/dygraph_api.*
-paddle/pten/api/lib/backward_api.cc
-paddle/pten/extension.h
-paddle/pten/include/*
-paddle/pten/infermeta/generated.*
+paddle/phi/api/backward/backward_api.h
+paddle/phi/api/include/api.h
+paddle/phi/api/lib/api.cc
+paddle/phi/api/lib/dygraph_api.*
+paddle/phi/api/lib/backward_api.cc
+paddle/phi/extension.h
+paddle/phi/include/*
+paddle/phi/infermeta/generated.*
 
 *.DS_Store
 *.vs
@@ -52,6 +52,6 @@ paddle/infrt/dialect/pd_ops_info.h
 .lit_test_times.txt
 paddle/infrt/tests/dialect/Output
 paddle/infrt/tests/lit.cfg.py
-paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc
+paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launchers.cc
 paddle/fluid/pybind/eager_final_state_op_function_impl.h
 paddle/fluid/pybind/tmp_eager_final_state_op_function_impl.h
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
@@ -122,7 +122,7 @@ set_property(GLOBAL PROPERTY PTEN_MODULES "")
 function(find_pten_modules TARGET_NAME)
   get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
   string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
-  string(FIND "${__target_path}" "pten" pos)
+  string(FIND "${__target_path}" "phi" pos)
   if(pos GREATER 1)
     get_property(pten_modules GLOBAL PROPERTY PTEN_MODULES)
     set(pten_modules ${pten_modules} ${TARGET_NAME})

diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
@@ -226,17 +226,17 @@ include_directories(${CMAKE_BINARY_DIR}/../paddle/fluid/framework/io)
 
 # copy api headers for pten & custom op
 copy(inference_lib_dist
-        SRCS  ${PADDLE_SOURCE_DIR}/paddle/pten/api/ext/*.h
-        DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api/ext/)
+        SRCS  ${PADDLE_SOURCE_DIR}/paddle/phi/api/ext/*.h
+        DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/ext/)
 copy(inference_lib_dist
-        SRCS  ${PADDLE_SOURCE_DIR}/paddle/pten/api/include/*.h
-        DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api/include/)
+        SRCS  ${PADDLE_SOURCE_DIR}/paddle/phi/api/include/*.h
+        DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/include/)
 copy(inference_lib_dist
-        SRCS  ${PADDLE_SOURCE_DIR}/paddle/pten/api/all.h
-        DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api/)
+        SRCS  ${PADDLE_SOURCE_DIR}/paddle/phi/api/all.h
+        DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/)
 copy(inference_lib_dist
-        SRCS  ${PADDLE_SOURCE_DIR}/paddle/pten/common/*.h
-        DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/common/)
+        SRCS  ${PADDLE_SOURCE_DIR}/paddle/phi/common/*.h
+        DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/common/)
 copy(inference_lib_dist
         SRCS  ${PADDLE_SOURCE_DIR}/paddle/utils/any.h
         DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/)

diff --git a/cmake/pten.cmake b/cmake/pten.cmake
@@ -33,7 +33,7 @@ function(generate_unify_header DIR_NAME)
 
     # generate target header file
     set(header_file ${CMAKE_CURRENT_SOURCE_DIR}/include/${header_name}.h)
-    file(WRITE ${header_file} "// Header file generated by paddle/pten/CMakeLists.txt for external users,\n// DO NOT edit or include it within paddle.\n\n#pragma once\n\n")
+    file(WRITE ${header_file} "// Header file generated by paddle/phi/CMakeLists.txt for external users,\n// DO NOT edit or include it within paddle.\n\n#pragma once\n\n")
 
     # get all top-level headers and write into header file
     file(GLOB HEADERS "${CMAKE_CURRENT_SOURCE_DIR}\/${DIR_NAME}\/*.h")
@@ -137,17 +137,17 @@ function(kernel_library TARGET)
     list(APPEND all_srcs ${xpu_srcs})
     foreach(src ${all_srcs})
         file(READ ${src} target_content)
-        string(REGEX MATCHALL "#include \"paddle\/pten\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
+        string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
         if ("${kernel_library_SUB_DIR}" STREQUAL "")
-            string(REGEX MATCHALL "#include \"paddle\/pten\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
+            string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
         else()
-            string(REGEX MATCHALL "#include \"paddle\/pten\/kernels\/${kernel_library_SUB_DIR}\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
+            string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
         endif()
         foreach(include_kernel ${include_kernels})
         if ("${kernel_library_SUB_DIR}" STREQUAL "")
-            string(REGEX REPLACE "#include \"paddle\/pten\/kernels\/" "" kernel_name ${include_kernel})
+            string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel})
         else()
-            string(REGEX REPLACE "#include \"paddle\/pten\/kernels\/${kernel_library_SUB_DIR}\/" "" kernel_name ${include_kernel})
+            string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/" "" kernel_name ${include_kernel})
         endif()
             string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name})
             list(APPEND kernel_deps ${kernel_name})

diff --git a/cmake/pten_header.cmake b/cmake/pten_header.cmake
@@ -22,7 +22,7 @@ if (pos GREATER 1)
     foreach(header ${HEADERS})
         if (${header} MATCHES ".*.h$")
             file(READ ${header} HEADER_CONTENT)
-            string(REPLACE "paddle/pten/" "paddle/include/experimental/pten/" HEADER_CONTENT "${HEADER_CONTENT}")
+            string(REPLACE "paddle/phi/" "paddle/include/experimental/phi/" HEADER_CONTENT "${HEADER_CONTENT}")
             string(REPLACE "paddle/utils/" "paddle/include/experimental/utils/" HEADER_CONTENT "${HEADER_CONTENT}")
             file(WRITE ${header} "${HEADER_CONTENT}")
             message(STATUS "pten header path compat processing complete: ${header}")
@@ -32,10 +32,10 @@ endif()
 endfunction()
 
 pten_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental)
-pten_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api)
-pten_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api/ext)
-pten_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api/include)
-pten_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/common)
+pten_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api)
+pten_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/ext)
+pten_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/include)
+pten_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/common)
 
 # In order to be compatible with the original behavior, the header file name needs to be changed
 file(RENAME ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/extension.h

diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt
@@ -2,6 +2,6 @@ add_subdirectory(utils)
 add_subdirectory(scripts)
 add_subdirectory(testing)
 set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests CACHE INTERNAL "python tests directory")
-add_subdirectory(pten)
+add_subdirectory(phi)
 add_subdirectory(infrt)
 add_subdirectory(fluid)
diff --git a/paddle/extension.h b/paddle/extension.h
@@ -15,4 +15,4 @@ limitations under the License. */
 #pragma once
 
 // All paddle apis in C++ frontend
-#include "paddle/pten/api/all.h"
+#include "paddle/phi/api/all.h"
diff --git a/paddle/fluid/distributed/common/sparse_sharding_merge.h b/paddle/fluid/distributed/common/sparse_sharding_merge.h
@@ -28,7 +28,7 @@
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/string/split.h"
-#include "paddle/pten/core/utils/dim.h"
+#include "paddle/phi/core/utils/dim.h"
 
 constexpr int FG = 256 * 1024 * 1024;
 constexpr int Q_SIZE = 10000;

diff --git a/paddle/fluid/distributed/common/utils.h b/paddle/fluid/distributed/common/utils.h
@@ -25,15 +25,15 @@
 #include <vector>
 
 #include "paddle/fluid/platform/device_context.h"
-#include "paddle/pten/kernels/funcs/blas/blas.h"
+#include "paddle/phi/kernels/funcs/blas/blas.h"
 
 namespace paddle {
 namespace distributed {
 
 template <typename T>
-inline pten::funcs::BlasT<paddle::platform::CPUDeviceContext, T> GetBlas() {
+inline phi::funcs::BlasT<paddle::platform::CPUDeviceContext, T> GetBlas() {
   paddle::platform::CPUDeviceContext cpu_ctx;
-  return pten::funcs::GetBlas<paddle::platform::CPUDeviceContext, T>(cpu_ctx);
+  return phi::funcs::GetBlas<paddle::platform::CPUDeviceContext, T>(cpu_ctx);
 }
 
 template <typename T>

diff --git a/paddle/fluid/distributed/fleet_executor/dist_model.cc b/paddle/fluid/distributed/fleet_executor/dist_model.cc
@@ -44,7 +44,7 @@ bool LoadDataFromDistModelTensor(const DistModelTensor &input_data,
                                  framework::LoDTensor *input_tensor,
                                  const platform::Place &place) {
   VLOG(3) << "Loading data from DistModelTensor for " << input_data.name;
-  framework::DDim dims = pten::make_ddim(input_data.shape);
+  framework::DDim dims = phi::make_ddim(input_data.shape);
   void *input_tensor_ptr;
   if (input_data.dtype == DistModelDataType::INT64) {
     input_tensor_ptr = input_tensor->mutable_data<int64_t>(dims, place);
@@ -518,7 +518,7 @@ bool DistModel::FetchResults(std::vector<DistModelTensor> *output_data,
 template <typename T>
 bool DistModel::FetchResult(const framework::LoDTensor &fetch,
                             DistModelTensor *output_data) {
-  auto shape = pten::vectorize(fetch.dims());
+  auto shape = phi::vectorize(fetch.dims());
   output_data->shape.assign(shape.begin(), shape.end());
   const T *data = fetch.data<T>();
   int64_t num_elems = fetch.numel();

diff --git a/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc b/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc
@@ -34,7 +34,7 @@ namespace distributed {
 std::vector<framework::OperatorBase*> GetOps() {
   framework::AttributeMap attrs;
   attrs["dtype"] = framework::proto::VarType::FP32;
-  attrs["shape"] = pten::vectorize<int>({2, 3});
+  attrs["shape"] = phi::vectorize<int>({2, 3});
   attrs["value"] = 1.0f;
 
   auto zero_op = framework::OpRegistry::CreateOp("fill_constant", {},

diff --git a/paddle/fluid/distributed/ps/service/brpc_ps_client.cc b/paddle/fluid/distributed/ps/service/brpc_ps_client.cc
@@ -1227,7 +1227,7 @@ int32_t BrpcPsClient::recv_and_save_table(const uint64_t table_id,
   framework::LoDTensor *var_tensor = var->GetMutable<framework::LoDTensor>();
 
   std::vector<int64_t> vec_dim = {var_num, var_shape};
-  var_tensor->Resize(pten::make_ddim(vec_dim));
+  var_tensor->Resize(phi::make_ddim(vec_dim));
 
   // copy and save
   float *tensor_data = var_tensor->mutable_data<float>(place);

diff --git a/paddle/fluid/distributed/ps/service/brpc_utils.cc b/paddle/fluid/distributed/ps/service/brpc_utils.cc
@@ -26,9 +26,9 @@ class Variable;
 }  // namespace framework
 }  // namespace paddle
 
-namespace pten {
+namespace phi {
 class DenseTensor;
-}  // namespace pten
+}  // namespace phi
 
 namespace paddle {
 namespace distributed {
@@ -79,7 +79,7 @@ void SerializeToMultiVarMsgAndIOBuf(
 
     if (var->IsType<framework::LoDTensor>()) {
       SerializeLodTensor(var, ctx, send_var_msg, &temp_iobuf);
-    } else if (var->IsType<pten::SelectedRows>()) {
+    } else if (var->IsType<phi::SelectedRows>()) {
       SerializeSelectedRows(var, ctx, send_var_msg, &temp_iobuf);
     }
     iobuf->append(temp_iobuf);
@@ -103,7 +103,7 @@ void SerializeLodTensor(framework::Variable* var,
   }
   var_msg->set_data_type(static_cast<VarMsg::Type>(
       framework::TransToProtoVarType(tensor->dtype())));
-  for (auto& dim : pten::vectorize(tensor->dims())) {
+  for (auto& dim : phi::vectorize(tensor->dims())) {
     var_msg->add_dims(dim);
   }
   // IO Buffer
@@ -134,7 +134,7 @@ void SerializeLodTensor(framework::Variable* var,
 void SerializeSelectedRows(framework::Variable* var,
                            const platform::DeviceContext& ctx, VarMsg* var_msg,
                            butil::IOBuf* iobuf) {
-  pten::SelectedRows* slr = var->GetMutable<pten::SelectedRows>();
+  phi::SelectedRows* slr = var->GetMutable<phi::SelectedRows>();
   auto* tensor = slr->mutable_value();
   auto* rows = slr->mutable_rows();
 
@@ -148,7 +148,7 @@ void SerializeSelectedRows(framework::Variable* var,
   memcpy(data_ptr, &((*rows)[0]), rows->size() * sizeof(int64_t));
   var_msg->set_data_type(static_cast<VarMsg::Type>(
       framework::TransToProtoVarType(tensor->dtype())));
-  for (auto& dim : pten::vectorize(tensor->dims())) {
+  for (auto& dim : phi::vectorize(tensor->dims())) {
     var_msg->add_dims(dim);
   }
   // IO Buffer
@@ -224,7 +224,7 @@ void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg,
   for (auto& x : msg.dims()) {
     vec_dim.push_back(x);
   }
-  tensor->Resize(pten::make_ddim(vec_dim));
+  tensor->Resize(phi::make_ddim(vec_dim));
 
   framework::LoD lod;
   for (int i = 0; i < msg.lod_level(); ++i) {
@@ -268,7 +268,7 @@ void DeserializeSelectedRows(
     butil::IOBufBytesIterator& io_buffer_itr,  // NOLINT
     const platform::DeviceContext& ctx) {
   const auto place = ctx.GetPlace();
-  auto* slr = var->GetMutable<pten::SelectedRows>();
+  auto* slr = var->GetMutable<phi::SelectedRows>();
   framework::Tensor* tensor = slr->mutable_value();
   slr->set_height(msg.slr_height());
   std::vector<int64_t> tmp_rows(msg.dims()[0]);
@@ -278,7 +278,7 @@ void DeserializeSelectedRows(
   for (auto& x : msg.dims()) {
     vec_dim.push_back(x);
   }
-  tensor->Resize(pten::make_ddim(vec_dim));
+  tensor->Resize(phi::make_ddim(vec_dim));
   void* tensor_data = tensor->mutable_data(
       place,
       framework::TransToPtenDataType(VarMessageToVarType(msg.data_type())));

diff --git a/paddle/fluid/distributed/ps/service/brpc_utils.h b/paddle/fluid/distributed/ps/service/brpc_utils.h
@@ -27,7 +27,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/selected_rows_utils.h"
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/framework/var_type.h"
-#include "paddle/pten/backends/dynload/port.h"
+#include "paddle/phi/backends/dynload/port.h"
 
 namespace butil {
 class IOBuf;

diff --git a/paddle/fluid/distributed/ps/service/communicator/communicator.cc b/paddle/fluid/distributed/ps/service/communicator/communicator.cc
@@ -28,7 +28,7 @@ namespace paddle {
 namespace distributed {
 
 using framework::LoDTensor;
-using pten::SelectedRows;
+using phi::SelectedRows;
 
 const uint32_t MAX_FEASIGN_NUM = 1024 * 100 * 100;
 
@@ -303,7 +303,7 @@ void Communicator::RpcSendSparse(const std::string &var_name, int table_id,
   std::vector<float *> push_g_vec;
 
   auto *send_var = scope.FindVar(var_name);
-  auto *tensor = send_var->GetMutable<pten::SelectedRows>();
+  auto *tensor = send_var->GetMutable<phi::SelectedRows>();
   auto dim = tensor->value().dims()[1];
   std::transform(tensor->rows().begin(), tensor->rows().end(),
                  std::back_inserter(sparse_push_keys),
@@ -866,7 +866,7 @@ bool AsyncCommunicator::Check(const std::vector<std::string> &var_tables) {
     VLOG(3) << "send step_counter into queue";
     auto tmp_var = std::make_shared<Variable>();
     auto *tensor = tmp_var->GetMutable<framework::LoDTensor>();
-    tensor->Resize(pten::make_ddim({1}));
+    tensor->Resize(phi::make_ddim({1}));
     auto *out_d = tensor->mutable_data<int64_t>(platform::CPUPlace());
     out_d[0] = 1;
     send_varname_to_queue_[table_name]->Push(tmp_var);
@@ -1027,10 +1027,10 @@ void GeoCommunicator::Send(const std::vector<std::string> &var_names,
 
   auto *var = scope.FindVar(table_name);
 
-  PADDLE_ENFORCE_EQ(var->IsType<pten::SelectedRows>(), true,
+  PADDLE_ENFORCE_EQ(var->IsType<phi::SelectedRows>(), true,
                     platform::errors::InvalidArgument(
                         "Only need to send Sparse Grad in Geo mode."));
-  auto &rows = var->Get<pten::SelectedRows>().rows();
+  auto &rows = var->Get<phi::SelectedRows>().rows();
 
   // insert ids which has not been record
   for (size_t j = 0; j < rows.size(); j++) {
@@ -1177,8 +1177,7 @@ void GeoCommunicator::SendDense(const CommContext &send_ctx) {
     auto *t_delta = var_delta->GetMutable<framework::LoDTensor>();
     t_delta->mutable_data<float>(t_latest.dims(), cpu_ctx.GetPlace());
 
-    auto blas =
-        pten::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
+    auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
     blas.VSUB(t_latest.numel(), t_latest.data<float>(),
               t_timestamp->data<float>(), t_delta->data<float>());
 
@@ -1218,8 +1217,7 @@ void GeoCommunicator::RecvDense(const CommContext &send_ctx) {
     auto *t_delta = var_delta->GetMutable<framework::LoDTensor>();
     t_delta->mutable_data<float>(t_latest->dims(), cpu_ctx.GetPlace());
 
-    auto blas =
-        pten::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
+    auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
     blas.VSUB(t_latest->numel(), t_pserver.data<float>(), t_old->data<float>(),
               t_delta->data<float>());
     blas.VADD(t_latest->numel(), t_latest->data<float>(),
@@ -1316,15 +1314,15 @@ void GeoCommunicator::SendSparse(const std::string &varname,
   paddle::platform::CPUDeviceContext cpu_ctx;
 
   auto *var_delta = delta_scope_->Var(varname);
-  auto *t_delta = var_delta->GetMutable<pten::SelectedRows>();
+  auto *t_delta = var_delta->GetMutable<phi::SelectedRows>();
   auto *var_t_value = t_delta->mutable_value();
   var_t_value->Resize({static_cast<int64_t>(sparse_ids.size()), dims1});
   auto *t_value = var_t_value->mutable_data<float>(cpu_ctx.GetPlace());
 
   t_delta->set_rows(sparse_ids);
   t_delta->set_height(t_latest.dims()[0]);
 
-  auto blas = pten::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
+  auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
   float coefficient = 1.0 / static_cast<float>(trainers_);
 
   std::vector<float *> push_g_vec;
@@ -1392,7 +1390,7 @@ void GeoCommunicator::RecvSparse(const std::string &varname, int table_id,
   v_delta.resize(numel);
 
   paddle::platform::CPUDeviceContext cpu_ctx;
-  auto blas = pten::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
+  auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
 
   for (auto j = 0; j < static_cast<int>(keys.size()); ++j) {
     VLOG(5) << "DEBUG GeoCommunicator::RecvSparse recv sparse key" << keys[j]