From 5240ac0c5aa9c5118584301f0a6d992c3d319170 Mon Sep 17 00:00:00 2001
From: Chen Weihang <chenweihang@baidu.com>
Date: Tue, 26 Oct 2021 08:46:16 +0800
Subject: [PATCH] Dev/op2func refactor 3 (#30)

* add a candidate dense tensor class, test=develop

* remove TensorBase::backend(), test=develop

* remove some ops, test=develop

* cherry-pick the pr of tensor meta, test=develop

* moves the dense tensor and some ops, test=develop

* update the linalg operator, test=develop

* update other operators, test=develop

* fix errors, test=develop

* fix bugs, test=develop

* try to resolve the problem of windows ci, test=develop

* updates codes, test=develop

* fix the tensor_utils.cc, test=develop

* modify the dense tensor, test=develop

* fix the data type, test=develop

Co-authored-by: shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
---
 paddle/fluid/framework/CMakeLists.txt         |   8 +-
 paddle/fluid/framework/operator.cc            |  12 +-
 paddle/fluid/framework/pten_utils.cc          | 142 ----------
 paddle/fluid/framework/pten_utils.h           |  30 +-
 paddle/fluid/framework/pten_utils_test.cc     |  60 ----
 paddle/fluid/imperative/prepared_operator.cc  |  14 +-
 paddle/fluid/operators/CMakeLists.txt         |   5 +-
 paddle/fluid/operators/dot_op.h               |  11 +-
 paddle/fluid/operators/fill_any_like_op.h     |   6 +-
 paddle/fluid/operators/mean_op.cu             |   1 +
 paddle/fluid/operators/mean_op.h              |   7 +-
 paddle/fluid/operators/scale_op.h             |   8 +-
 paddle/fluid/operators/sign_op.h              |   6 +-
 paddle/pten/common/data_type.h                |  16 +-
 paddle/pten/core/CMakeLists.txt               |  10 +-
 paddle/pten/core/candidate/CMakeLists.txt     |   1 -
 paddle/pten/core/candidate/dense_tensor.cc    | 145 ----------
 paddle/pten/core/candidate/dense_tensor.h     | 188 -------------
 paddle/pten/core/dense_tensor.cc              | 190 +++++++------
 paddle/pten/core/dense_tensor.h               | 256 ++++++++++--------
 paddle/pten/core/tensor_base.h                |   2 -
 paddle/pten/core/tensor_meta.h                | 152 ++++-------
 paddle/pten/hapi/CMakeLists.txt               |   2 +-
 paddle/pten/hapi/lib/creation.cc              |   9 +-
 paddle/pten/hapi/lib/linalg.cc                |   6 +-
 paddle/pten/hapi/lib/manipulation.cc          |   6 +-
 paddle/pten/hapi/lib/math.cc                  |   7 +-
 paddle/pten/hapi/lib/utils/CMakeLists.txt     |   3 +-
 paddle/pten/hapi/lib/utils/tensor_utils.cc    | 110 +++++++-
 paddle/pten/hapi/lib/utils/tensor_utils.h     |  58 +---
 .../hapi/lib/utils/tests/test_tensor_utils.cc |  29 +-
 paddle/pten/infershape/binary.cc              |   6 +-
 paddle/pten/infershape/binary.h               |  14 +-
 paddle/pten/infershape/unary.cc               |  18 +-
 paddle/pten/infershape/unary.h                |  21 +-
 paddle/pten/kernels/cpu/CMakeLists.txt        |   2 +-
 paddle/pten/kernels/cpu/manipulation.cc       |   6 +-
 paddle/pten/kernels/cpu/utils.cc              |   3 +-
 paddle/pten/kernels/cuda/CMakeLists.txt       |   4 +-
 paddle/pten/kernels/cuda/manipulation.cu      |   6 +-
 paddle/pten/kernels/cuda/math.cu              |  24 +-
 paddle/pten/kernels/cuda/utils.cu             |   3 +-
 paddle/pten/kernels/functions/eigen/dot.h     |   1 -
 paddle/pten/kernels/functions/eigen/mean.h    |   2 -
 paddle/pten/tests/CMakeLists.txt              |  10 +-
 paddle/pten/tests/dense_tensor_test.cc        |  13 -
 paddle/pten/tests/test_copy_api.cc            |  21 +-
 paddle/pten/tests/test_dot_api.cc             |  21 +-
 paddle/pten/tests/test_fill_api.cc            |  39 +--
 paddle/pten/tests/test_flatten_api.cc         |  12 +-
 paddle/pten/tests/test_mean_api.cc            |  12 +-
 51 files changed, 632 insertions(+), 1106 deletions(-)
 delete mode 100644 paddle/pten/core/candidate/CMakeLists.txt
 delete mode 100644 paddle/pten/core/candidate/dense_tensor.cc
 delete mode 100644 paddle/pten/core/candidate/dense_tensor.h

diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index 231105628dd7c..889925c6fdd39 100644
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -195,10 +195,12 @@ cc_library(unused_var_check SRCS unused_var_check.cc DEPS glog no_need_buffer_va
 
 IF(WITH_XPU)
 cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
-    shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils pten pten_utils)
+    shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils
+    pten pten_utils kernel_factory)
 ELSE()
 cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
-    shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils pten pten_utils)
+    shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils
+    pten pten_utils kernel_factory)
 ENDIF()
 
 cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context)
@@ -392,7 +394,7 @@ cc_library(save_load_util SRCS save_load_util.cc DEPS tensor scope layer)
 cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tensor scope layer)
 cc_library(generator SRCS generator.cc DEPS enforce place)
 
-cc_library(pten_utils SRCS pten_utils.cc DEPS lod_tensor selected_rows place pten var_type_traits)
+cc_library(pten_utils SRCS pten_utils.cc DEPS lod_tensor selected_rows place pten var_type_traits pten_hapi_utils)
 
 # Get the current working branch
 execute_process(
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index 7c63f7c76c921..f8ec13f1d8b98 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -1819,10 +1819,10 @@ pten::KernelContext OperatorWithKernel::BuildPtenKernelContext(
 
     paddle::SmallVector<std::shared_ptr<pten::TensorBase>> tmp_inputs;
     for (auto var : ins_vector) {
-      auto pt_in = framework::InputVariableToPtenTensor(*var, in_def);
-      tmp_inputs.emplace_back(pt_in);
+      tmp_inputs.emplace_back(
+          experimental::MakePtenTensorBaseFromVar(*var, in_def));
     }
-    op_kernel_ctx.EmplaceBackInputs(tmp_inputs);
+    op_kernel_ctx.EmplaceBackInputs(std::move(tmp_inputs));
   }
 
   for (size_t i = 0; i < output_names.size(); ++i) {
@@ -1831,10 +1831,10 @@ pten::KernelContext OperatorWithKernel::BuildPtenKernelContext(
 
     paddle::SmallVector<std::shared_ptr<pten::TensorBase>> tmp_outputs;
     for (auto var : outs_vector) {
-      auto pt_out = framework::OutputVariableToPtenTensor(var, out_def);
-      tmp_outputs.emplace_back(pt_out);
+      tmp_outputs.emplace_back(
+          experimental::MakePtenTensorBaseFromVar(var, out_def));
     }
-    op_kernel_ctx.EmplaceBackOutputs(tmp_outputs);
+    op_kernel_ctx.EmplaceBackOutputs(std::move(tmp_outputs));
   }
 
   for (size_t i = 0; i < attr_names.size(); ++i) {
diff --git a/paddle/fluid/framework/pten_utils.cc b/paddle/fluid/framework/pten_utils.cc
index 9dac142557ed4..96408afc100e9 100644
--- a/paddle/fluid/framework/pten_utils.cc
+++ b/paddle/fluid/framework/pten_utils.cc
@@ -24,148 +24,6 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 
-// TODO(chenweihang, shixiaowei): adapt SelectedRows
-template <>
-std::shared_ptr<pten::DenseTensor> MakeTensorImpl<pten::DenseTensor, LoDTensor>(
-    const LoDTensor& tensor, pten::Backend backend,
-    paddle::experimental::DataType dtype,
-    paddle::experimental::DataLayout layout) {
-  auto holder = tensor.Holder();
-  auto tensor_impl = std::make_shared<pten::DenseTensor>(
-      pten::TensorMeta(tensor.dims(), backend, dtype, layout, tensor.offset()),
-      pten::TensorStatus());
-
-  if (holder != nullptr) {
-    tensor_impl->ShareAllocation(tensor.Holder());
-  }
-  return tensor_impl;
-}
-
-template <>
-std::shared_ptr<pten::DenseTensor> MakeTensorImpl<pten::DenseTensor, Tensor>(
-    const Tensor& tensor, pten::Backend backend,
-    paddle::experimental::DataType dtype,
-    paddle::experimental::DataLayout layout) {
-  auto holder = tensor.Holder();
-  auto tensor_impl = std::make_shared<pten::DenseTensor>(
-      pten::TensorMeta(tensor.dims(), backend, dtype, layout, tensor.offset()),
-      pten::TensorStatus());
-
-  if (holder != nullptr) {
-    tensor_impl->ShareAllocation(tensor.Holder());
-  }
-  return tensor_impl;
-}
-
-template <>
-std::shared_ptr<pten::DenseTensor> MakeTensorImpl<pten::DenseTensor>(
-    const LoDTensor& tensor, const platform::Place& place,
-    proto::VarType::Type type) {
-  return MakeTensorImpl<pten::DenseTensor, LoDTensor>(
-      tensor, pten::TransToPtenBackend(place), pten::TransToPtenDataType(type),
-      pten::TransToPtenDataLayout(tensor.layout()));
-}
-
-template <>
-std::shared_ptr<pten::DenseTensor> MakeTensorImpl<pten::DenseTensor>(
-    const Tensor& tensor, const platform::Place& place,
-    proto::VarType::Type type) {
-  return MakeTensorImpl<pten::DenseTensor, Tensor>(
-      tensor, pten::TransToPtenBackend(place), pten::TransToPtenDataType(type),
-      pten::TransToPtenDataLayout(tensor.layout()));
-}
-
-template <>
-void ShareTensorImpl<pten::DenseTensor>(pten::DenseTensor* tensor_impl,
-                                        LoDTensor* out) {
-  out->ResetHolderWithType(tensor_impl->allocation(),
-                           pten::TransToProtoVarType(tensor_impl->data_type()));
-}
-
-template <>
-void ShareTensorImpl<pten::DenseTensor>(pten::DenseTensor* tensor_impl,
-                                        Tensor* out) {
-  out->ResetHolderWithType(tensor_impl->allocation(),
-                           pten::TransToProtoVarType(tensor_impl->data_type()));
-}
-
-std::shared_ptr<pten::TensorBase> InputVariableToPtenTensor(
-    const framework::Variable& variable, const pten::TensorArgDef& arg_def) {
-  auto expected_place = pten::TransToFluidPlace(arg_def.backend);
-
-  if (variable.template IsType<framework::LoDTensor>()) {
-    const auto& tensor = variable.template Get<framework::LoDTensor>();
-    if (!platform::is_same_place(tensor.place(), expected_place)) {
-      framework::LoDTensor tmp_tensor;
-      framework::TensorCopySync(tensor, expected_place, &tmp_tensor);
-      auto pt_in =
-          framework::MakeTensorImpl<pten::DenseTensor, framework::LoDTensor>(
-              tmp_tensor, arg_def.backend, arg_def.dtype, arg_def.layout);
-      return pt_in;
-    } else {
-      auto pt_in =
-          framework::MakeTensorImpl<pten::DenseTensor, framework::LoDTensor>(
-              tensor, arg_def.backend, arg_def.dtype, arg_def.layout);
-      return pt_in;
-    }
-  } else if (variable.template IsType<framework::SelectedRows>()) {
-    // TODO(chenweihang): now we don't deal with row and height
-    // by xiaowei's advice
-    const auto& tensor = variable.template Get<framework::SelectedRows>();
-    if (!platform::is_same_place(tensor.value().place(), expected_place)) {
-      framework::Tensor tmp_tensor;
-      TensorCopySync(tensor.value(), expected_place, &tmp_tensor);
-      // TODO(chenweihang): adapt SelectedRows by xiaowei's design
-      auto pt_in =
-          framework::MakeTensorImpl<pten::DenseTensor, framework::Tensor>(
-              tmp_tensor, arg_def.backend, arg_def.dtype, arg_def.layout);
-      return pt_in;
-    } else {
-      auto pt_in =
-          framework::MakeTensorImpl<pten::DenseTensor, framework::Tensor>(
-              tensor.value(), arg_def.backend, arg_def.dtype, arg_def.layout);
-      return pt_in;
-    }
-  } else {
-    PADDLE_THROW(platform::errors::Unimplemented(
-        "Unsupported shared input `%s` type now when call pt kernel.",
-        framework::ToTypeName(variable.Type())));
-  }
-  return nullptr;
-}
-
-std::shared_ptr<pten::TensorBase> OutputVariableToPtenTensor(
-    framework::Variable* variable, const pten::TensorArgDef& arg_def) {
-  // mutable_data before run kernel, to avoid share output form
-  // KernelContext to original tensor
-  if (variable->template IsType<framework::LoDTensor>()) {
-    auto* tensor = variable->template GetMutable<framework::LoDTensor>();
-    tensor->mutable_data(pten::TransToFluidPlace(arg_def.backend),
-                         pten::TransToProtoVarType(arg_def.dtype));
-    auto pt_out =
-        framework::MakeTensorImpl<pten::DenseTensor, framework::LoDTensor>(
-            *tensor, arg_def.backend, arg_def.dtype, arg_def.layout);
-    return pt_out;
-  } else if (variable->template IsType<framework::SelectedRows>()) {
-    auto* tensor = variable->template GetMutable<framework::SelectedRows>();
-    tensor->mutable_value()->mutable_data(
-        pten::TransToFluidPlace(arg_def.backend),
-        pten::TransToProtoVarType(arg_def.dtype));
-    // TODO(chenweihang): adapt SelectedRows by xiaowei's design,
-    // here the row and height will lost in output!
-    auto pt_out =
-        framework::MakeTensorImpl<pten::DenseTensor, framework::Tensor>(
-            tensor->value(), arg_def.backend, arg_def.dtype, arg_def.layout);
-    return pt_out;
-  } else {
-    PADDLE_THROW(platform::errors::Unimplemented(
-        "Unsupported shared output `%s` type now when call pt kernel.",
-        framework::ToTypeName(variable->Type())));
-  }
-
-  return nullptr;
-}
-
 OpKernelType TransPtenKernelKeyToOpKernelType(
     const pten::KernelKey& kernel_key) {
   proto::VarType::Type data_type =
diff --git a/paddle/fluid/framework/pten_utils.h b/paddle/fluid/framework/pten_utils.h
index 263101657ceb9..8c1c25b3b67cd 100644
--- a/paddle/fluid/framework/pten_utils.h
+++ b/paddle/fluid/framework/pten_utils.h
@@ -25,41 +25,13 @@ limitations under the License. */
 #include "paddle/fluid/platform/macros.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/pten/api/include/core.h"
+#include "paddle/pten/hapi/lib/utils/tensor_utils.h"
 #include "paddle/utils/flat_hash_map.h"
 #include "paddle/utils/small_vector.h"
 
 namespace paddle {
 namespace framework {
 
-/* tensor translate */
-
-template <typename PtenTensorImplT, typename VariableT>
-std::shared_ptr<PtenTensorImplT> MakeTensorImpl(
-    const VariableT& tensor, pten::Backend backend,
-    paddle::experimental::DataType dtype,
-    paddle::experimental::DataLayout layout);
-
-template <typename PtenTensorImplT>
-std::shared_ptr<PtenTensorImplT> MakeTensorImpl(const LoDTensor& tensor,
-                                                const platform::Place& place,
-                                                proto::VarType::Type type);
-
-template <typename PtenTensorImplT>
-std::shared_ptr<PtenTensorImplT> MakeTensorImpl(const Tensor& tensor,
-                                                const platform::Place& place,
-                                                proto::VarType::Type type);
-
-template <typename PtenTensorImplT>
-void ShareTensorImpl(PtenTensorImplT* tensor_impl, LoDTensor* out);
-
-template <typename PtenTensorImplT>
-void ShareTensorImpl(PtenTensorImplT* tensor_impl, Tensor* out);
-
-std::shared_ptr<pten::TensorBase> InputVariableToPtenTensor(
-    const framework::Variable& variable, const pten::TensorArgDef& arg_def);
-std::shared_ptr<pten::TensorBase> OutputVariableToPtenTensor(
-    framework::Variable* variable, const pten::TensorArgDef& arg_def);
-
 /* Kernel Key translate */
 
 OpKernelType TransPtenKernelKeyToOpKernelType(
diff --git a/paddle/fluid/framework/pten_utils_test.cc b/paddle/fluid/framework/pten_utils_test.cc
index 33c55a8086b4e..ab2d60a34303a 100644
--- a/paddle/fluid/framework/pten_utils_test.cc
+++ b/paddle/fluid/framework/pten_utils_test.cc
@@ -18,66 +18,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/selected_rows.h"
 #include "paddle/fluid/framework/variable.h"
 
-TEST(PtenUtils, FluidTensorToPtenTensor) {
-  // 1. create tensor
-  paddle::framework::LoDTensor x;
-  paddle::framework::Tensor x2;
-  x.Resize({2});
-  x.mutable_data<float>(paddle::platform::CPUPlace());
-  x.data<float>()[0] = 0.2;
-  x.data<float>()[1] = 0.5;
-
-  // 2. test API
-  auto dense_x = paddle::framework::MakeTensorImpl<pten::DenseTensor>(
-      x, x.place(), x.type());
-
-  // 3. check result
-  std::vector<float> expect_value = {0.2, 0.5};
-  ASSERT_EQ(dense_x->data<float>()[0], expect_value[0]);
-  ASSERT_EQ(dense_x->data<float>()[1], expect_value[1]);
-  ASSERT_EQ(dense_x->backend(), pten::Backend::CPU);
-  ASSERT_EQ(dense_x->data_type(), pten::DataType::FLOAT32);
-}
-
-TEST(PtenUtils, VarToPtenTensor) {
-  // 1. create Variable
-  paddle::framework::Variable v;
-  auto selected_rows = v.GetMutable<paddle::framework::SelectedRows>();
-  paddle::framework::Tensor* value = selected_rows->mutable_value();
-  auto* data = value->mutable_data<int>(paddle::framework::make_ddim({1, 1}),
-                                        paddle::platform::CPUPlace());
-  data[0] = 123;
-  pten::Backend expect_backend = pten::Backend::CPU;
-
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-  expect_backend = pten::Backend::CUDA;
-#endif
-  auto tensor_def = pten::TensorArgDef(expect_backend, pten::DataLayout::NCHW,
-                                       pten::DataType::INT32);
-  // 2. test API
-  auto tensor_x = paddle::framework::InputVariableToPtenTensor(v, tensor_def);
-  // 3. check result
-  ASSERT_EQ(tensor_x->backend(), expect_backend);
-  ASSERT_EQ(tensor_x->data_type(), pten::DataType::INT32);
-}
-
-TEST(PtenUtils, PtenTensorToFluidTensor) {
-  pten::DenseTensor dense_tensor(
-      pten::TensorMeta(paddle::framework::make_ddim({1, 1}), pten::Backend::CPU,
-                       pten::DataType::FLOAT32, pten::DataLayout::ANY),
-      pten::TensorStatus());
-  auto* data_ptr = dense_tensor.mutable_data<float>();
-  data_ptr[0] = 0.5;
-  // share allocation into fluid Tensor
-  paddle::framework::Tensor tensor;
-  paddle::framework::LoDTensor lod_tensor;
-  paddle::framework::ShareTensorImpl(&dense_tensor, &tensor);
-  paddle::framework::ShareTensorImpl(&dense_tensor, &lod_tensor);
-  // compare
-  ASSERT_EQ(tensor.data<float>()[0], 0.5);
-  ASSERT_EQ(lod_tensor.data<float>()[0], 0.5);
-}
-
 TEST(PtenUtils, TransPtenKernelKeyToOpKernelType) {
   pten::KernelKey kernel_key(pten::Backend::CPU, pten::DataLayout::NCHW,
                              pten::DataType::FLOAT32);
diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc
index 2ffb47273f650..f2251e34fb029 100644
--- a/paddle/fluid/imperative/prepared_operator.cc
+++ b/paddle/fluid/imperative/prepared_operator.cc
@@ -293,11 +293,10 @@ static pten::KernelContext BuildDygraphPtenKernelContext(
     paddle::SmallVector<std::shared_ptr<pten::TensorBase>> tmp_inputs;
     for (auto var : ins_vector) {
       const auto& variable = var->Var();
-
-      auto pt_in = framework::InputVariableToPtenTensor(variable, in_def);
-      tmp_inputs.emplace_back(pt_in);
+      tmp_inputs.emplace_back(
+          experimental::MakePtenTensorBaseFromVar(variable, in_def));
     }
-    op_kernel_ctx.EmplaceBackInputs(tmp_inputs);
+    op_kernel_ctx.EmplaceBackInputs(std::move(tmp_inputs));
   }
 
   for (size_t i = 0; i < output_names.size(); ++i) {
@@ -307,11 +306,10 @@ static pten::KernelContext BuildDygraphPtenKernelContext(
     paddle::SmallVector<std::shared_ptr<pten::TensorBase>> tmp_outputs;
     for (auto var : outs_vector) {
       auto* variable = var->MutableVar();
-
-      auto pt_out = framework::OutputVariableToPtenTensor(variable, out_def);
-      tmp_outputs.emplace_back(pt_out);
+      tmp_outputs.emplace_back(
+          experimental::MakePtenTensorBaseFromVar(variable, out_def));
     }
-    op_kernel_ctx.EmplaceBackOutputs(tmp_outputs);
+    op_kernel_ctx.EmplaceBackOutputs(std::move(tmp_outputs));
   }
 
   for (size_t i = 0; i < attr_names.size(); ++i) {
diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
index adbd9bf277b11..bafc650c433db 100644
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -80,8 +80,9 @@ if(WITH_UNITY_BUILD)
 endif()
 
 set(OP_HEADER_DEPS ${OP_HEADER_DEPS} pten)
-set(OP_HEADER_DEPS ${OP_HEADER_DEPS} pten_utils)
-register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op load_combine_op lstm_op run_program_op eye_op 
+#set(OP_HEADER_DEPS ${OP_HEADER_DEPS} pten_utils)
+register_operators(EXCLUDES
+py_layer_op py_func_op warpctc_op dgc_op load_combine_op lstm_op run_program_op eye_op 
         recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op spectral_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS})
 
 op_library(run_program_op SRCS run_program_op.cc run_program_op.cu.cc DEPS executor_cache ${OP_HEADER_DEPS})
diff --git a/paddle/fluid/operators/dot_op.h b/paddle/fluid/operators/dot_op.h
index 641b0d653d5b0..6a025fdd9ccc6 100644
--- a/paddle/fluid/operators/dot_op.h
+++ b/paddle/fluid/operators/dot_op.h
@@ -16,13 +16,13 @@
 
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
-#include "paddle/fluid/framework/pten_utils.h"
 #include "paddle/fluid/operators/math/complex_functors.h"
 #include "paddle/fluid/platform/for_range.h"
 
 // only can include the headers in paddle/pten/api dirs
 #include "paddle/pten/api/include/core.h"
 #include "paddle/pten/api/include/linalg.h"
+#include "paddle/pten/hapi/lib/utils/tensor_utils.h"
 
 namespace paddle {
 namespace operators {
@@ -244,12 +244,9 @@ class DotKernel : public framework::OpKernel<T> {
     auto& dev_ctx = ctx.device_context<DeviceContext>();
     out->mutable_data<T>(x->place());
 
-    auto pt_x =
-        framework::MakeTensorImpl<pten::DenseTensor>(*x, x->place(), x->type());
-    auto pt_y =
-        framework::MakeTensorImpl<pten::DenseTensor>(*y, y->place(), y->type());
-    auto pt_out = framework::MakeTensorImpl<pten::DenseTensor>(*out, x->place(),
-                                                               x->type());
+    auto pt_x = paddle::experimental::MakePtenDenseTensor(*x);
+    auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
+    auto pt_out = paddle::experimental::MakePtenDenseTensor(*out);
 
     // call new kernel
     pten::Dot<T>(dev_ctx, *pt_x.get(), *pt_y.get(), pt_out.get());
diff --git a/paddle/fluid/operators/fill_any_like_op.h b/paddle/fluid/operators/fill_any_like_op.h
index 73170c6e2e277..fc649f42c51a1 100644
--- a/paddle/fluid/operators/fill_any_like_op.h
+++ b/paddle/fluid/operators/fill_any_like_op.h
@@ -62,10 +62,8 @@ class FillAnyLikeKernel : public framework::OpKernel<T> {
         std::isnan(value), false,
         platform::errors::InvalidArgument("The filled value is NaN."));
 
-    auto pt_x = framework::MakeTensorImpl<pten::DenseTensor>(*in, in->place(),
-                                                             in->type());
-    auto pt_out = framework::MakeTensorImpl<pten::DenseTensor>(
-        *out, out->place(), out->type());
+    auto pt_x = paddle::experimental::MakePtenDenseTensor(*in);
+    auto pt_out = paddle::experimental::MakePtenDenseTensor(*out);
 
     const auto& dev_ctx = context.template device_context<DeviceContext>();
     // call new kernel
diff --git a/paddle/fluid/operators/mean_op.cu b/paddle/fluid/operators/mean_op.cu
index ffb667ba974b8..26c844392d4d7 100644
--- a/paddle/fluid/operators/mean_op.cu
+++ b/paddle/fluid/operators/mean_op.cu
@@ -62,6 +62,7 @@ class MeanCUDAGradKernel : public framework::OpKernel<T> {
 
 namespace ops = paddle::operators;
 namespace plat = paddle::platform;
+
 REGISTER_OP_CUDA_KERNEL(
     mean, ops::MeanKernel<paddle::platform::CUDADeviceContext, float>,
     ops::MeanKernel<paddle::platform::CUDADeviceContext, double>,
diff --git a/paddle/fluid/operators/mean_op.h b/paddle/fluid/operators/mean_op.h
index 9a8c2736589c9..9d9954a8412a3 100644
--- a/paddle/fluid/operators/mean_op.h
+++ b/paddle/fluid/operators/mean_op.h
@@ -20,6 +20,7 @@ limitations under the License. */
 // only can include the headers in paddle/top/api dirs
 #include "paddle/pten/api/include/core.h"
 #include "paddle/pten/api/include/math.h"
+#include "paddle/pten/hapi/lib/utils/tensor_utils.h"
 
 namespace paddle {
 namespace operators {
@@ -61,10 +62,8 @@ class MeanKernel : public framework::OpKernel<T> {
     auto& dev_ctx = context.device_context<DeviceContext>();
     out->mutable_data<T>(x->place());
 
-    auto pt_x =
-        framework::MakeTensorImpl<pten::DenseTensor>(*x, x->place(), x->type());
-    auto pt_out = framework::MakeTensorImpl<pten::DenseTensor>(*out, x->place(),
-                                                               x->type());
+    auto pt_x = paddle::experimental::MakePtenDenseTensor(*x);
+    auto pt_out = paddle::experimental::MakePtenDenseTensor(*out);
 
     // call new kernel
     VLOG(1) << "chenweihang: call original mean kernel compute.";
diff --git a/paddle/fluid/operators/scale_op.h b/paddle/fluid/operators/scale_op.h
index 9a043361678b2..0d7113a6f4de9 100644
--- a/paddle/fluid/operators/scale_op.h
+++ b/paddle/fluid/operators/scale_op.h
@@ -20,6 +20,7 @@ limitations under the License. */
 // only can include the headers in paddle/top/api dirs
 #include "paddle/pten/api/include/core.h"
 #include "paddle/pten/api/include/math.h"
+#include "paddle/pten/hapi/lib/utils/tensor_utils.h"
 
 namespace paddle {
 namespace operators {
@@ -60,16 +61,13 @@ class ScaleKernel : public framework::OpKernel<T> {
       out_slr->set_rows(in_slr.rows());
       out_slr->set_height(in_slr.height());
     }
-
     auto* out =
         framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(out_var);
     out->mutable_data<T>(in->place());
     auto& dev_ctx = ctx.device_context<DeviceContext>();
 
-    auto pt_x = framework::MakeTensorImpl<pten::DenseTensor>(*in, in->place(),
-                                                             in->type());
-    auto pt_out = framework::MakeTensorImpl<pten::DenseTensor>(
-        *out, in->place(), in->type());
+    auto pt_x = paddle::experimental::MakePtenDenseTensor(*in);
+    auto pt_out = paddle::experimental::MakePtenDenseTensor(*out);
 
     // call new kernel
     pten::Scale<T>(dev_ctx, *pt_x.get(), scale, bias, bias_after_scale,
diff --git a/paddle/fluid/operators/sign_op.h b/paddle/fluid/operators/sign_op.h
index f3083f4937875..0e3036115e3c1 100644
--- a/paddle/fluid/operators/sign_op.h
+++ b/paddle/fluid/operators/sign_op.h
@@ -36,10 +36,8 @@ class SignKernel : public framework::OpKernel<T> {
     auto& dev_ctx = context.device_context<DeviceContext>();
     out->mutable_data<T>(x->place());
 
-    auto pt_x =
-        framework::MakeTensorImpl<pten::DenseTensor>(*x, x->place(), x->type());
-    auto pt_out = framework::MakeTensorImpl<pten::DenseTensor>(*out, x->place(),
-                                                               x->type());
+    auto pt_x = paddle::experimental::MakePtenDenseTensor(*x);
+    auto pt_out = paddle::experimental::MakePtenDenseTensor(*out);
 
     // call new kernel
     pten::Sign<T>(dev_ctx, *pt_x.get(), pt_out.get());
diff --git a/paddle/pten/common/data_type.h b/paddle/pten/common/data_type.h
index f5383da31cf93..27ca28b273485 100644
--- a/paddle/pten/common/data_type.h
+++ b/paddle/pten/common/data_type.h
@@ -54,6 +54,7 @@ inline size_t SizeOf(DataType data_type) {
     case DataType::UINT8:
     case DataType::INT8:
       return 1;
+    case DataType::BFLOAT16:
     case DataType::FLOAT16:
     case DataType::INT16:
     case DataType::UINT16:
@@ -65,11 +66,11 @@ inline size_t SizeOf(DataType data_type) {
     case DataType::FLOAT64:
     case DataType::INT64:
     case DataType::UINT64:
-      return 8;
-    case DataType::UNDEFINED:
-    case DataType::BFLOAT16:
     case DataType::COMPLEX64:
+      return 8;
     case DataType::COMPLEX128:
+      return 16;
+    case DataType::UNDEFINED:
     case DataType::NUM_DATA_TYPES:
       PADDLE_THROW(platform::errors::Unimplemented(
           "Data type %d is not supported by tensor.",
@@ -138,12 +139,21 @@ inline std::ostream& operator<<(std::ostream& os, DataType dtype) {
     case DataType::INT16:
       os << "int16";
       break;
+    case DataType::UINT16:
+      os << "uint16";
+      break;
     case DataType::INT32:
       os << "int32";
       break;
+    case DataType::UINT32:
+      os << "uint32";
+      break;
     case DataType::INT64:
       os << "int64";
       break;
+    case DataType::UINT64:
+      os << "uint64";
+      break;
     case DataType::BFLOAT16:
       os << "bfloat16";
       break;
diff --git a/paddle/pten/core/CMakeLists.txt b/paddle/pten/core/CMakeLists.txt
index ca562332bb79f..a7ccf31467438 100644
--- a/paddle/pten/core/CMakeLists.txt
+++ b/paddle/pten/core/CMakeLists.txt
@@ -1,5 +1,3 @@
-add_subdirectory(candidate)
-
 IF(WITH_MKLDNN)
     set(MKLDNN_CTX_DEPS mkldnn)
 ELSE()
@@ -7,15 +5,15 @@ ELSE()
 ENDIF()
 
 if(WITH_GPU)
-    cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info)
+  cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info)
 elseif(WITH_ROCM)
-    cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info)
+  cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info)
 else()
-    cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place)
+  cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place)
 endif()
-cc_library(dense_tensor SRCS dense_tensor.cc DEPS enforce data_type ddim allocator place convert_utils ${MKLDNN_CTX_DEPS})
 
 cc_library(kernel_factory SRCS kernel_factory.cc DEPS enforce)
 cc_library(kernel_context SRCS kernel_context.cc DEPS enforce device_context)
 
 cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS enforce)
+cc_library(dense_tensor SRCS dense_tensor.cc DEPS tensor_base)
diff --git a/paddle/pten/core/candidate/CMakeLists.txt b/paddle/pten/core/candidate/CMakeLists.txt
deleted file mode 100644
index dd670abdba1c1..0000000000000
--- a/paddle/pten/core/candidate/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-cc_library(pten_dense_tensor SRCS dense_tensor.cc DEPS tensor_base)
diff --git a/paddle/pten/core/candidate/dense_tensor.cc b/paddle/pten/core/candidate/dense_tensor.cc
deleted file mode 100644
index 325edd1ba077f..0000000000000
--- a/paddle/pten/core/candidate/dense_tensor.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/pten/core/candidate/dense_tensor.h"
-
-namespace pten {
-namespace candidate {
-
-DenseTensorMeta::DenseTensorMeta(DataType type, const DDim& dims)
-    : dims(dims), type(type) {}
-DenseTensorMeta::DenseTensorMeta(DataType type,
-                                 const DDim& dims,
-                                 DataLayout layout)
-    : dims(dims), type(type), layout(layout) {}
-DenseTensorMeta::DenseTensorMeta(DataType type,
-                                 const DDim& dims,
-                                 DataLayout layout,
-                                 const std::vector<std::vector<size_t>>& lod)
-    : dims(dims), type(type), layout(layout), lod(lod) {}
-
-bool DenseTensorMeta::valid() const noexcept {
-  bool valid{true};
-  valid = valid && (type != DataType::UNDEFINED);
-  valid = valid && (layout != DataLayout::UNDEFINED);
-  valid = valid && (is_scalar || product(dims));
-  return valid;
-}
-
-DenseTensor::DenseTensor(const std::shared_ptr<Allocator>& a,
-                         const DenseTensorMeta& meta)
-    : meta_(meta),
-      storage_(
-          make_intrusive<TensorStorage>(a, SizeOf(data_type()) * numel())) {}
-
-DenseTensor::DenseTensor(const std::shared_ptr<Allocator>& a,
-                         DenseTensorMeta&& meta)
-    : meta_(std::move(meta)),
-      storage_(
-          make_intrusive<TensorStorage>(a, SizeOf(data_type()) * numel())) {}
-
-DenseTensor::DenseTensor(intrusive_ptr<Storage> storage,
-                         const DenseTensorMeta& meta)
-    : meta_(meta), storage_(std::move(storage)) {}
-
-DenseTensor::DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta)
-    : meta_(std::move(meta)), storage_(std::move(storage)) {}
-
-int64_t DenseTensor::numel() const {
-  if (meta_.is_scalar) {
-    return 1;
-  }
-  return product(meta_.dims);
-}
-
-bool DenseTensor::SharesStorageWith(const DenseTensor& b) const {
-  return storage_.get() == b.storage_.get() && storage_.get() != nullptr;
-}
-
-template <typename T>
-T* DenseTensor::mutable_data(size_t request_bytes) {
-  PADDLE_ENFORCE(
-      valid(),
-      paddle::platform::errors::PreconditionNotMet(
-          "The meta data must be valid when call the mutable data function."));
-  PADDLE_ENFORCE_NOT_NULL(
-      storage_,
-      paddle::platform::errors::PreconditionNotMet(
-          "The storage must be valid when call the mutable data function."));
-  PADDLE_ENFORCE(
-      (data_type() == paddle::experimental::CppTypeToDataType<T>::Type()),
-      paddle::platform::errors::PreconditionNotMet(
-          "The type of data we are trying to retrieve does not match the "
-          "type of data currently contained in the container."));
-  size_t bytes = numel() * SizeOf(data_type());
-  if (request_bytes) {
-    PADDLE_ENFORCE_GE(request_bytes,
-                      bytes,
-                      paddle::platform::errors::InvalidArgument(
-                          "The reserved size %d should be enough to meet the "
-                          "volume required by metadata %d.",
-                          request_bytes,
-                          bytes));
-    bytes = request_bytes;
-  }
-  if (storage_->size() < bytes) {
-    storage_->Realloc(bytes);
-  }
-  return static_cast<T*>(storage_->data());
-}
-
-template <typename T>
-const T* DenseTensor::data() const {
-  PADDLE_ENFORCE_NOT_NULL(
-      storage_,
-      paddle::platform::errors::PreconditionNotMet(
-          "The storage must be valid when call the mutable data function."));
-  PADDLE_ENFORCE(
-      (data_type() == paddle::experimental::CppTypeToDataType<T>::Type()),
-      paddle::platform::errors::PreconditionNotMet(
-          "The type of data we are trying to retrieve does not match the "
-          "type of data currently contained in the container."));
-  return static_cast<const T*>(storage_->data());
-}
-
-void DenseTensor::check_memory_size() const {
-  size_t bytes = numel() * SizeOf(data_type());
-  PADDLE_ENFORCE_GE(memory_size(),
-                    bytes,
-                    paddle::platform::errors::InvalidArgument(
-                        "The memory size %d should be enough to meet the "
-                        "volume required by metadata %d.",
-                        memory_size(),
-                        bytes));
-}
-
-#define DATA_MEMBER_FUNC_INSTANTIATION(dtype)                      \
-  template dtype* DenseTensor::mutable_data(size_t request_bytes); \
-  template const dtype* DenseTensor::data() const;
-
-DATA_MEMBER_FUNC_INSTANTIATION(int8_t);
-DATA_MEMBER_FUNC_INSTANTIATION(uint8_t);
-DATA_MEMBER_FUNC_INSTANTIATION(int16_t);
-DATA_MEMBER_FUNC_INSTANTIATION(uint16_t);
-DATA_MEMBER_FUNC_INSTANTIATION(int32_t);
-DATA_MEMBER_FUNC_INSTANTIATION(uint32_t);
-DATA_MEMBER_FUNC_INSTANTIATION(int64_t);
-DATA_MEMBER_FUNC_INSTANTIATION(uint64_t);
-DATA_MEMBER_FUNC_INSTANTIATION(float);
-DATA_MEMBER_FUNC_INSTANTIATION(double);
-
-#undef DATA_MEMBER_FUNC_INSTANTIATION
-
-}  // namespace candidate
-}  // namespace pten
diff --git a/paddle/pten/core/candidate/dense_tensor.h b/paddle/pten/core/candidate/dense_tensor.h
deleted file mode 100644
index 21a093439529f..0000000000000
--- a/paddle/pten/core/candidate/dense_tensor.h
+++ /dev/null
@@ -1,188 +0,0 @@
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include "paddle/fluid/framework/ddim.h"
-#include "paddle/fluid/platform/place.h"
-#include "paddle/pten/common/data_type.h"
-#include "paddle/pten/core/allocator.h"
-#include "paddle/pten/core/storage.h"
-#include "paddle/pten/core/tensor_base.h"
-
-namespace pten {
-namespace candidate {
-
-using DDim = paddle::framework::DDim;
-
-/// \brief The meta data of dense tensor. Take the structure type
-/// and use all default operations.
-///
-struct DenseTensorMeta {
-  using DataType = paddle::experimental::DataType;
-  using DataLayout = paddle::experimental::DataLayout;
-
-  DenseTensorMeta() = default;
-  DenseTensorMeta(DataType type, const DDim& dims);
-  DenseTensorMeta(DataType type, const DDim& dims, DataLayout layout);
-  DenseTensorMeta(DataType type,
-                  const DDim& dims,
-                  DataLayout layout,
-                  const std::vector<std::vector<size_t>>& lod);
-
-  /// \brief Test whether the metadata is valid. Does not throw exceptions.
-  /// \return Whether the metadata is valid.
-  bool valid() const noexcept;
-
-  /// During the entire life cycle of a DenseTensor, the following attributes
-  /// marked with `const` are expected to remain unchanged.
-  const bool is_scalar{false};
-  DDim dims;
-  const DataType type{DataType::FLOAT32};
-  const DataLayout layout{DataLayout::NCHW};
-  std::vector<std::vector<size_t>> lod;
-};
-
-/// \brief The Dense tensor store values in a contiguous sequential block
-/// of memory where all values are represented. Tensors or multi-dimensional
-/// arrays are used in math operators.
-/// During the entire life cycle of a DenseTensor, its device type and key
-/// metadata are set unchanged.
-class DenseTensor : public TensorBase,
-                    public TypeInfoTraits<TensorBase, DenseTensor> {
- public:
-  /// \brief Construct a dense tensor and allocate space.
-  /// \param a The allocator used to allocate space.
-  /// \param meta The meta data of dense tensor.
-  DenseTensor(const std::shared_ptr<Allocator>& a, const DenseTensorMeta& meta);
-
-  /// \brief Construct a dense tensor and allocate space.
-  /// \param a The allocator used to allocate space.
-  /// \param meta The meta data of dense tensor.
-  DenseTensor(const std::shared_ptr<Allocator>& a, DenseTensorMeta&& meta);
-
-  /// \brief Use existing storage space to create dense tensor. This interface
-  /// can be used to deliberately create an uninitialized dense tensor.
-  /// \param storage The existing storage.
-  /// \param meta The meta data of dense tensor.
-  DenseTensor(intrusive_ptr<Storage> storage, const DenseTensorMeta& meta);
-
-  /// \brief Use existing storage space to create dense tensor. This interface
-  /// can be used to deliberately create an uninitialized dense tensor.
-  /// \param storage The existing storage.
-  /// \param meta The meta data of dense tensor.
-  DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta);
-
-  /// \brief Because dense tensor is a kind of container, we give a default
-  /// constructor to use for stl container. But the dense tensor created with
-  /// the default constructor is not practical.
-  DenseTensor() = default;
-
-  /// \brief Because dense tensor is a resource handle, we provide a default
-  /// move constructor to support move semantics.
-  DenseTensor(DenseTensor&& other) = default;
-
-  /// \brief We do not recommend deep copy of dense tensor because of its
-  /// efficiency and complexity across devices. The operation is disabled here.
-  DenseTensor(const DenseTensor& other) = delete;
-
-  /// \brief Destroy the tensor object and release exclusive resources.
-  virtual ~DenseTensor() = default;
-
- public:
-  /// \brief Returns the name of the class for type traits.
-  /// \return The name of the class.
-  static const char* name() { return "DenseTensor"; }
-
-  /// \brief Returns the number of elements contained in tensor.
-  /// \return The number of elements contained in tensor.
-  int64_t numel() const;
-
-  /// \brief Returns the dims of the tensor.
-  /// \return The dims of the tensor.
-  const DDim& dims() const noexcept { return meta_.dims; }
-
-  /// \brief Returns the lod of the tensor.
-  /// \return The lod of the tensor.
-  const std::vector<std::vector<size_t>>& lod() const noexcept {
-    return meta_.lod;
-  }
-
-  /// \brief Returns the data type of the tensor.
-  /// \return The data type of the tensor.
-  DataType data_type() const noexcept { return meta_.type; }
-
-  /// \brief Returns the data layout of the tensor.
-  /// \return The data layout of the tensor.
-  DataLayout layout() const noexcept { return meta_.layout; }
-
-  /// \brief Returns the data place of the tensor.
-  /// \return The data place of the tensor.
-  const Place& place() const { return storage_->place(); }
-
-  /// \brief Test whether the metadata is valid.
-  /// \return Whether the metadata is valid.
-  bool valid() const noexcept { return meta_.valid(); }
-
-  /// \brief Test whether the storage is allocated.
-  /// return Whether the storage is allocated.
-  bool initialized() const { return storage_->data(); }
-
-  /// \brief Check if storage is shared with other objects.
-  /// \return Whether the storage is shared with other objects.
-  bool SharesStorageWith(const DenseTensor& b) const;
-
-  /// \brief Change the dims information in the metadata, and the corresponding
-  /// memory allocation will occur when the `mutable_data` is called.
-  /// \param dims The new dims of the dense tensor.
-  void Resize(const DDim& dims) noexcept { meta_.dims = dims; }
-
-  /// \brief Returns the actual storage size occupied by tensor, may be larger
-  /// than its shape dims.
-  /// \return The actual storage size occupied by tensor.
-  size_t memory_size() const { return storage_->size(); }
-
-  /// \brief Check that the storage area is large enough to hold the data of the
-  /// metadata size, and throw an exception if the conditions are not met.
-  void check_memory_size() const;
-
-  /// \brief Release the storage area for other purposes. Because of the
-  /// destruction of encapsulation, we do not support two dense tensors directly
-  /// sharing the same intrusive pointer.
-  /// \return The rvalue of instrusize pointer releated to the released storage.
-  intrusive_ptr<Storage> release() { return std::move(storage_); }
-
-  /// \brief Get the mutable data pointer value of type T.
-  /// Memory allocation may occur when calling this interface:
-  /// 1. When the storage size is not enough to meet the current shape of the
-  /// data.
-  /// 2. When more request_bytes parameters are used to reserve the data
-  /// storage.
-  /// param request_bytes The bytes to reserve the data storage.
-  /// \return The mutable data pointer value of type T.
-  template <typename T>
-  T* mutable_data(size_t request_bytes = 0);
-
-  /// \brief Get the const data pointer value of type T.
-  /// \return The const data pointer value of type T.
-  template <typename T>
-  const T* data() const;
-
- private:
-  DenseTensorMeta meta_;
-  intrusive_ptr<Storage> storage_;
-};
-
-}  // namespace candidate
-}  // namespace pten
diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc
index 0a11c8e7d1912..647ddea0b4e1b 100644
--- a/paddle/pten/core/dense_tensor.cc
+++ b/paddle/pten/core/dense_tensor.cc
@@ -13,114 +13,126 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/pten/core/dense_tensor.h"
-#include "paddle/pten/core/convert_utils.h"
-
-// See Note [ Why still include the fluid headers? ]
-#include "paddle/fluid/framework/data_type.h"
-#include "paddle/fluid/memory/malloc.h"
-#include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/gpu_info.h"
-#include "paddle/fluid/platform/place.h"
 
 namespace pten {
 
-using CPUPlace = paddle::platform::CPUPlace;
-using CUDAPlace = paddle::platform::CUDAPlace;
-using CUDAPinnedPlace = paddle::platform::CUDAPinnedPlace;
-using XPUPlace = paddle::platform::XPUPlace;
-using NPUPlace = paddle::platform::NPUPlace;
-using NPUPinnedPlace = paddle::platform::NPUPinnedPlace;
+DenseTensor::DenseTensor(const std::shared_ptr<Allocator>& a,
+                         const DenseTensorMeta& meta)
+    : meta_(meta),
+      storage_(
+          make_intrusive<TensorStorage>(a, SizeOf(data_type()) * numel())) {}
 
-const paddle::platform::Place& DenseTensor::place() const {
-  PADDLE_ENFORCE_NOT_NULL(
-      allocation_,
-      paddle::platform::errors::PreconditionNotMet(
-          "Tensor not initialized yet when Tensor::place() is called."));
-  return allocation_->place();
-}
+DenseTensor::DenseTensor(const std::shared_ptr<Allocator>& a,
+                         DenseTensorMeta&& meta)
+    : meta_(std::move(meta)),
+      storage_(
+          make_intrusive<TensorStorage>(a, SizeOf(data_type()) * numel())) {}
+
+DenseTensor::DenseTensor(intrusive_ptr<Storage> storage,
+                         const DenseTensorMeta& meta)
+    : meta_(meta), storage_(std::move(storage)) {}
 
-//----------------------------------------------------------------
-// Inner methods
+DenseTensor::DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta)
+    : meta_(std::move(meta)), storage_(std::move(storage)) {}
 
-void DenseTensor::ShareAllocation(
-    const std::shared_ptr<paddle::memory::allocation::Allocation>& allocation) {
-  // This operation can be very slow!
-  // std::shared_ptr reference count is atomic. increasing or decreasing
-  // the reference count requires atomic increment or decrement.
-  // This is hundred times slower than non-atomic increment/decrement
-  allocation_ = allocation;
+int64_t DenseTensor::numel() const {
+  if (meta_.is_scalar) {
+    return 1;
+  }
+  return product(meta_.dims);
+}
+
+bool DenseTensor::IsSharedWith(const DenseTensor& b) const {
+  return storage_.get() == b.storage_.get() && storage_.get() != nullptr;
 }
 
-// TODO(chenweihang): Add other place branchs
-paddle::platform::Place DenseTensor::GetPlaceByBackend() const {
-  switch (meta_.backend) {
-    case Backend::CPU:
-      return CPUPlace();
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-    case Backend::CUDA:
-      return CUDAPlace(paddle::platform::GetCurrentDeviceId());
-#endif
-    default:
-      PADDLE_THROW(paddle::platform::errors::Unimplemented(
-          "Unsupported Tensor backend."));
+void* DenseTensor::mutable_data(size_t request_bytes) {
+  PADDLE_ENFORCE(
+      valid(),
+      paddle::platform::errors::PreconditionNotMet(
+          "The meta data must be valid when call the mutable data function."));
+  PADDLE_ENFORCE_NOT_NULL(
+      storage_,
+      paddle::platform::errors::PreconditionNotMet(
+          "The storage must be valid when call the mutable data function."));
+  size_t bytes = numel() * SizeOf(data_type());
+  if (request_bytes) {
+    PADDLE_ENFORCE_GE(request_bytes,
+                      bytes,
+                      paddle::platform::errors::InvalidArgument(
+                          "The reserved size %d should be enough to meet the "
+                          "volume required by metadata %d.",
+                          request_bytes,
+                          bytes));
+    bytes = request_bytes;
+  }
+  if (storage_->size() < bytes) {
+    storage_->Realloc(bytes);
   }
+  return storage_->data();
 }
 
-size_t DenseTensor::MemorySize() const {
-  return allocation_ == nullptr ? 0UL : allocation_->size() - meta_.offset;
+template <typename T>
+T* DenseTensor::mutable_data() {
+  PADDLE_ENFORCE(
+      (data_type() == paddle::experimental::CppTypeToDataType<T>::Type()),
+      paddle::platform::errors::PreconditionNotMet(
+          "The type of data (%d) we are trying to retrieve does not match the "
+          "type of data currently contained in the container (%d).",
+          static_cast<int>(paddle::experimental::CppTypeToDataType<T>::Type()),
+          static_cast<int>(data_type())));
+  return static_cast<T*>(mutable_data());
 }
 
-void DenseTensor::CheckMemorySize() const {
-  PADDLE_ENFORCE_NOT_NULL(allocation_,
-                          paddle::platform::errors::PreconditionNotMet(
-                              "Tensor holds no memory. "
-                              "Call Tensor::mutable_data firstly."));
-  size_t size_of_type =
-      paddle::framework::SizeOfType(TransToProtoVarType(meta_.type));
-  PADDLE_ENFORCE_LE(
-      numel() * size_of_type,
-      MemorySize(),
+template <typename T>
+const T* DenseTensor::data() const {
+  PADDLE_ENFORCE(
+      (data_type() == paddle::experimental::CppTypeToDataType<T>::Type()),
       paddle::platform::errors::PreconditionNotMet(
-          "Tensor's dimension is out of bound."
-          "Tensor's dimension must be equal or less than the size of its "
-          "memory."
-          "But received  Tensor's dimension is d%, memory's size is %d.",
-          numel() * size_of_type,
-          MemorySize()));
+          "The type of data we are trying to retrieve does not match the "
+          "type of data currently contained in the container."));
+  return static_cast<const T*>(data());
 }
 
 const void* DenseTensor::data() const {
-  CheckMemorySize();
-  return reinterpret_cast<const void*>(
-      reinterpret_cast<uintptr_t>(allocation_->ptr()) + meta_.offset);
+  PADDLE_ENFORCE_NOT_NULL(
+      storage_,
+      paddle::platform::errors::PreconditionNotMet(
+          "The storage must be valid when call the mutable data function."));
+  return storage_->data();
 }
 
-void* DenseTensor::mutable_data() {
-  PADDLE_ENFORCE_GE(
-      numel(),
-      0,
-      paddle::platform::errors::PreconditionNotMet(
-          "The Tensor's element number must be equal or greater than zero. "
-          "The Tensor's shape is [",
-          dims(),
-          "] now"));
-  size_t size =
-      numel() * paddle::framework::SizeOfType(TransToProtoVarType(meta_.type));
-  auto place = GetPlaceByBackend();
-  if (allocation_ == nullptr) {
-    allocation_.reset();
-    allocation_ = paddle::memory::AllocShared(place, size);
-  } else {
-    if (!(allocation_->place() == place) ||
-        allocation_->size() < size + meta_.offset) {
-      allocation_.reset();
-      allocation_ = paddle::memory::AllocShared(place, size);
-    } else {
-      // do nothing
-    }
-  }
-  return reinterpret_cast<void*>(
-      reinterpret_cast<uintptr_t>(allocation_->ptr()) + meta_.offset);
+void DenseTensor::check_memory_size() const {
+  size_t bytes = numel() * SizeOf(data_type());
+  PADDLE_ENFORCE_GE(memory_size(),
+                    bytes,
+                    paddle::platform::errors::InvalidArgument(
+                        "The memory size %d should be enough to meet the "
+                        "volume required by metadata %d.",
+                        memory_size(),
+                        bytes));
 }
 
+#define DATA_MEMBER_FUNC_INSTANTIATION(dtype)  \
+  template dtype* DenseTensor::mutable_data(); \
+  template const dtype* DenseTensor::data() const;
+
+DATA_MEMBER_FUNC_INSTANTIATION(bool);
+DATA_MEMBER_FUNC_INSTANTIATION(int8_t);
+DATA_MEMBER_FUNC_INSTANTIATION(uint8_t);
+DATA_MEMBER_FUNC_INSTANTIATION(int16_t);
+DATA_MEMBER_FUNC_INSTANTIATION(uint16_t);
+DATA_MEMBER_FUNC_INSTANTIATION(int32_t);
+DATA_MEMBER_FUNC_INSTANTIATION(uint32_t);
+DATA_MEMBER_FUNC_INSTANTIATION(int64_t);
+DATA_MEMBER_FUNC_INSTANTIATION(uint64_t);
+DATA_MEMBER_FUNC_INSTANTIATION(::paddle::platform::bfloat16);
+DATA_MEMBER_FUNC_INSTANTIATION(::paddle::platform::float16);
+DATA_MEMBER_FUNC_INSTANTIATION(float);
+DATA_MEMBER_FUNC_INSTANTIATION(double);
+DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex64);
+DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128);
+
+#undef DATA_MEMBER_FUNC_INSTANTIATION
+
 }  // namespace pten
diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h
index e913440a7e663..46932ecac2ad0 100644
--- a/paddle/pten/core/dense_tensor.h
+++ b/paddle/pten/core/dense_tensor.h
@@ -14,137 +14,159 @@ limitations under the License. */
 
 #pragma once
 
-#include <memory>
-
+#include "paddle/pten/core/allocator.h"
+#include "paddle/pten/core/storage.h"
 #include "paddle/pten/core/tensor_base.h"
 #include "paddle/pten/core/tensor_meta.h"
-#include "paddle/pten/core/tensor_status.h"
-
-namespace paddle {
-namespace memory {
-namespace allocation {
-class Allocation;
-}
-}
-}
 
 namespace pten {
 
-using DataType = paddle::experimental::DataType;
-
-/**
- * The implementation of general Tensor (For CPU, CUDA, HIP, etc.), similar
- * to the Tensor in fluid, contains a pointer to Allocation and a series of
- * descriptive metadata and status required by Tensor.
- *
- * DenseTensor is still a base class, it may have inherited classes.
- *
- * The memory layout of these inherited classes is consistent with the
- * basic DenseTensor, except that a small number of members are added to
- * further specialize the description of the tensor.
- *
- * If the memory layout is different, it cannot be described based on the
- * general Allocation, and it needs to be directly inherited from
- * TensorBase.
- */
-class DenseTensor : public TensorBase {
+/// \brief The Dense tensor store values in a contiguous sequential block
+/// of memory where all values are represented. Tensors or multi-dimensional
+/// arrays are used in math operators.
+/// During the entire life cycle of a DenseTensor, its device type and key
+/// metadata are set unchanged.
+class DenseTensor : public TensorBase,
+                    public TypeInfoTraits<TensorBase, DenseTensor> {
  public:
-  // Not allowed to initialize a tensor without descriptive metadata
-  DenseTensor() = delete;
-
-  // DenseTensor(const DenseTensor&) = delete;
-  // DenseTensor& operator=(const DenseTensor&) = delete;
-  DenseTensor(DenseTensor&&) = delete;
-  DenseTensor& operator=(DenseTensor&&) = delete;
-
-  /**
-   * If we still malloc memory by mutable_data,
-   * the DenseTensor doesn't need complicated constructor.
-   *
-   * Note: Tensor objects lacking meta information are not allowed to exist.
-   */
-  DenseTensor(const TensorMeta& meta, const TensorStatus& status)
-      : meta_(meta), status_(status) {}
-
-  DenseTensor(TensorMeta&& meta, TensorStatus&& status)
-      : meta_(std::move(meta)), status_(std::move(status)) {}
-
-  int64_t numel() const override { return meta_.numel; }
-
-  const paddle::framework::DDim& dims() const override { return meta_.dims; }
-
-  DataType data_type() const override { return meta_.type; }
+  /// \brief Construct a dense tensor and allocate space.
+  /// \param a The allocator used to allocate space.
+  /// \param meta The meta data of dense tensor.
+  DenseTensor(const std::shared_ptr<Allocator>& a, const DenseTensorMeta& meta);
+
+  /// \brief Construct a dense tensor and allocate space.
+  /// \param a The allocator used to allocate space.
+  /// \param meta The meta data of dense tensor.
+  DenseTensor(const std::shared_ptr<Allocator>& a, DenseTensorMeta&& meta);
+
+  /// \brief Use existing storage space to create dense tensor. This interface
+  /// can be used to deliberately create an uninitialized dense tensor.
+  /// \param storage The existing storage.
+  /// \param meta The meta data of dense tensor.
+  DenseTensor(intrusive_ptr<Storage> storage, const DenseTensorMeta& meta);
+
+  /// \brief Use existing storage space to create dense tensor. This interface
+  /// can be used to deliberately create an uninitialized dense tensor.
+  /// \param storage The existing storage.
+  /// \param meta The meta data of dense tensor.
+  DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta);
+
+  /// \brief Because dense tensor is a kind of container, we give a default
+  /// constructor to use for stl container. But the dense tensor created with
+  /// the default constructor is not practical.
+  DenseTensor() = default;
+
+  /// \brief Because dense tensor is a resource handle, we provide a default
+  /// move constructor to support move semantics.
+  DenseTensor(DenseTensor&& other) = default;
+
+  /// \brief We do not recommend deep copy of dense tensor because of its
+  /// efficiency and complexity across devices. The operation is disabled here.
+  DenseTensor(const DenseTensor& other) = delete;
+
+  /// \brief Destroy the tensor object and release exclusive resources.
+  virtual ~DenseTensor() = default;
 
-  DataLayout layout() const override { return meta_.layout; }
-
-  const paddle::platform::Place& place() const override;
-
-  Backend backend() const override { return meta_.backend; }
-
-  bool valid() const override { return allocation_ != nullptr; }
-
-  bool initialized() const override { return allocation_ != nullptr; }
-
-  /* member methods */
-
-  const std::shared_ptr<paddle::memory::allocation::Allocation>& allocation()
-      const {
-    return allocation_;
+ public:
+  /// \brief Returns the name of the class for type traits.
+  /// \return The name of the class.
+  static const char* name() { return "DenseTensor"; }
+
+  /// \brief Returns the number of elements contained in tensor.
+  /// \return The number of elements contained in tensor.
+  int64_t numel() const;
+
+  /// \brief Returns the dims of the tensor.
+  /// \return The dims of the tensor.
+  const DDim& dims() const noexcept { return meta_.dims; }
+
+  /// \brief Returns the lod of the tensor.
+  /// \return The lod of the tensor.
+  const std::vector<std::vector<size_t>>& lod() const noexcept {
+    return meta_.lod;
   }
 
-  const TensorMeta& meta() const { return meta_; }
-
-  TensorMeta* mutable_meta() { return &meta_; }
-
-  /* Data Access Methods */
-
-  const void* data() const;
-
-  void* mutable_data();
-
+  /// \brief Set the lod of the tensor.
+  void set_lod(const std::vector<std::vector<size_t>>& lod) { meta_.lod = lod; }
+
+  /// \brief Returns the data type of the tensor.
+  /// \return The data type of the tensor.
+  DataType data_type() const noexcept { return meta_.type; }
+
+  /// \brief Returns the data layout of the tensor.
+  /// \return The data layout of the tensor.
+  DataLayout layout() const noexcept { return meta_.layout; }
+
+  /// \brief Returns the data place of the tensor.
+  /// \return The data place of the tensor.
+  const Place& place() const { return storage_->place(); }
+
+  /// \brief Returns the meta information of the tensor.
+  /// \return The meta information of the tensor.
+  const DenseTensorMeta& meta() const noexcept { return meta_; }
+
+  /// \brief Test whether the metadata is valid.
+  /// \return Whether the metadata is valid.
+  bool valid() const noexcept { return meta_.valid(); }
+
+  /// \brief Test whether the storage is allocated.
+  /// return Whether the storage is allocated.
+  bool initialized() const { return storage_->data(); }
+
+  /// \brief Check if storage is shared with other objects.
+  /// \return Whether the storage is shared with other objects.
+  bool IsSharedWith(const DenseTensor& b) const;
+
+  /// \brief Change the dims information in the metadata, and the corresponding
+  /// memory allocation will occur when the `mutable_data` is called.
+  /// \param dims The new dims of the dense tensor.
+  void Resize(const DDim& dims) noexcept { meta_.dims = dims; }
+
+  /// \brief Returns the actual storage size occupied by tensor, may be larger
+  /// than its shape dims.
+  /// \return The actual storage size occupied by tensor.
+  size_t memory_size() const { return storage_->size(); }
+
+  /// \brief Check that the storage area is large enough to hold the data of the
+  /// metadata size, and throw an exception if the conditions are not met.
+  void check_memory_size() const;
+
+  /// \brief Release the storage area for other purposes. Because of the
+  /// destruction of encapsulation, we do not support two dense tensors directly
+  /// sharing the same intrusive pointer.
+  /// \return The rvalue of instrusize pointer releated to the released storage.
+  intrusive_ptr<Storage> release() { return std::move(storage_); }
+
+  /// \brief Get the mutable data pointer value of type T.
+  /// Memory allocation may occur when calling this interface:
+  /// 1. When the storage size is not enough to meet the current shape of the
+  /// data.
+  /// \return The mutable data pointer value of type T.
   template <typename T>
-  const T* data() const {
-    static_assert(std::is_pod<T>::value || std::is_same<T, void>::value,
-                  "T must be POD when call Tensor.data<T>().");
-    return reinterpret_cast<const T*>(data());
-  }
-
-  // NOTE: mutable_data does not hold arguments. Before calling mutable_data,
-  // please make sure that Tensor has maintained
-  // the correct meta and status.
-  //
-  // TODO(chenweihang): We need to be able to specify the allocator when
-  // mutable_data, or directly remove the mutable_data method.
-  // DenseTensor cannot actively apply for memory. Its memory application is
-  // handled by the DeviceContext->AllocateTensorData interface.
-  // I prefer the latter
+  T* mutable_data();
+
+  /// \brief Get the mutable data pointer value of raw type.
+  /// Memory allocation may occur when calling this interface:
+  /// 1. When the storage size is not enough to meet the current shape of the
+  /// data.
+  /// 2. When more request_bytes parameters are used to reserve the data
+  /// storage.
+  /// param request_bytes The bytes to reserve the data storage.
+  /// \return The mutable data pointer value of type T.
+  void* mutable_data(size_t request_bytes = 0);
+
+  /// \brief Get the const data pointer value of type T.
+  /// \return The const data pointer value of type T.
   template <typename T>
-  T* mutable_data() {
-    static_assert(std::is_pod<T>::value,
-                  "T must be POD when call Tensor.mutable_data<T>().");
-    return reinterpret_cast<T*>(mutable_data());
-  }
-
-  // For non-API and non-member interfaces, we still follow the C++ code style?
-
-  void Resize(const DDim& dims) { meta_.dims = dims; }
-
-  void ShareAllocation(const std::shared_ptr<
-                       paddle::memory::allocation::Allocation>& allocation);
+  const T* data() const;
 
-  paddle::platform::Place GetPlaceByBackend() const;
-
-  size_t MemorySize() const;
-
-  void CheckMemorySize() const;
+  /// \brief Get the const data pointer value of raw type.
+  /// \return The const data pointer value of raw type.
+  const void* data() const;
 
  private:
-  // The actual Tensor storage holder
-  std::shared_ptr<paddle::memory::allocation::Allocation> allocation_;
-  // The Tensor meta data
-  TensorMeta meta_;
-  // The Tensor status data
-  TensorStatus status_;
+  DenseTensorMeta meta_;
+  intrusive_ptr<Storage> storage_;
 };
 
 }  // namespace pten
diff --git a/paddle/pten/core/tensor_base.h b/paddle/pten/core/tensor_base.h
index 74cc082646fe2..79fd742aea10b 100644
--- a/paddle/pten/core/tensor_base.h
+++ b/paddle/pten/core/tensor_base.h
@@ -61,8 +61,6 @@ class TensorBase {
   /// return Whether the storage is allocated.
   virtual bool initialized() const = 0;
 
-  virtual paddle::experimental::Backend backend() const { return {}; }
-
   /// \brief Return the type information of the derived class to support
   /// safely downcast in non-rtti environment.
   /// return The type information of the derived class.
diff --git a/paddle/pten/core/tensor_meta.h b/paddle/pten/core/tensor_meta.h
index 8783ee584faf6..b4452a644f152 100644
--- a/paddle/pten/core/tensor_meta.h
+++ b/paddle/pten/core/tensor_meta.h
@@ -28,114 +28,58 @@ limitations under the License. */
 
 namespace pten {
 
-// template <typename T>
-// using Vector = paddle::framework::Vector<T>;
-
-/*
- * LoD is short for Level of Details.
- *
- * - in a level, each element indicates relative offset of the lower level
- * - the first element should be 0 and that indicates that this sequence start
- * from 0
- * - each sequence's begin and end(no-inclusive) is level[id, id+1]
- *
- * For example:
- *    3-level LoD stores
- *
- *    0 2 3
- *    0 2 4 7
- *    0 2 5 7 10 12 15 20
- */
-// using LoD = std::vector<paddle::framework::Vector<size_t>>;
-using LoD = std::vector<std::vector<size_t>>;
 using DDim = paddle::framework::DDim;
-/**
- * The Meta data member of DenseTensor.
- *
- * Here the `meta` represents information describing the basic features and
- * data features of Tensor, and does not include the status information of
- * Tensor
- *
- * Note: TensorMeta is a struct, the members are named like
- * ordinary nonmember variables, such as `type` instead of `type_`.
- * And we direct access its members, in addition to constructor, destructor
- * and functions for setting data members, can not provide other functions.
- */
-struct TensorMeta {
-  TensorMeta() = delete;
-  TensorMeta& operator=(const TensorMeta&) = delete;
-  TensorMeta& operator=(TensorMeta&&) = delete;
-
-  TensorMeta(const TensorMeta&) = default;
-  // TensorMeta(TensorMeta&&) = default;
-
-  TensorMeta(TensorMeta&& meta)
-      : dims(meta.dims),
-        backend(meta.backend),
-        type(meta.type),
-        layout(meta.layout),
-        numel(meta.numel),
-        offset(meta.offset),
-        lod(meta.lod) {}
-
-  // Compatible Contructor
-  TensorMeta(const DDim& dims,
-             Backend backend,
-             DataType type,
-             DataLayout layout,
-             size_t offset = 0UL,
-             const LoD& lod = {})
-      : dims(dims),
-        backend(backend),
-        type(type),
-        layout(layout),
-        offset(offset),
-        lod(lod) {
-    int64_t init_numel = paddle::framework::product(dims);
-    if (init_numel >= 0) {
-      numel = init_numel;
-    }
-  }
-
-  virtual ~TensorMeta() = default;
+using LoD = std::vector<std::vector<size_t>>;
 
+/// \brief The meta data of dense tensor. Take the structure type
+/// and use all default operations.
+///
+struct DenseTensorMeta {
+  using DataType = paddle::experimental::DataType;
+  using DataLayout = paddle::experimental::DataLayout;
+
+  DenseTensorMeta() = default;
+  DenseTensorMeta(DataType type, const DDim& dims);
+  DenseTensorMeta(DataType type, const DDim& dims, DataLayout layout);
+  DenseTensorMeta(DataType type,
+                  const DDim& dims,
+                  DataLayout layout,
+                  const std::vector<std::vector<size_t>>& lod);
+
+  /// \brief Test whether the metadata is valid. Does not throw exceptions.
+  /// \return Whether the metadata is valid.
+  bool valid() const noexcept;
+
+  /// During the entire life cycle of a DenseTensor, the following attributes
+  /// marked with `const` are expected to remain unchanged.
+  const bool is_scalar{false};
   DDim dims;
-
-  Backend backend{Backend::CPU};
-  DataType type{DataType::FLOAT32};
-  DataLayout layout{DataLayout::NCHW};
-
-  /**
-   * [ Why not calculate numel based on dims? ]
-   *
-   * Tensor may be 0-dimensional, but 0-dimensional Tensor may have values.
-   * For example:
-   *
-   *   import paddle
-   *
-   *   a = paddle.to_tensor([1, 2, 3])
-   *   print(a[0].shape) # expected: []
-   *   print(a[0].numel()) # expected: 1
-   *
-   * Now Paddle can not get expected result above, because the old Tensor's
-   * numel is calculated based on dims.
-   */
-  int64_t numel{1};
-
-  size_t offset{0};
-
-  /**
-   * [ Why basic TensorMeta hold LoD? ]
-   *
-   * LoDTensor is still the main Tensor concept in Paddle.
-   * Although only a small number of ops need to use LoD information,
-   * LoD may need to be passed between Op's input and output, which is
-   * difficult to remove in a short time.
-   *
-   * But we don't want to add a Tensor type because of LoD, which makes
-   * the concept complicated, so LoD is a member held by Tensor by default.
-   */
+  const DataType type{DataType::FLOAT32};
+  const DataLayout layout{DataLayout::NCHW};
   LoD lod;
 };
 
+inline DenseTensorMeta::DenseTensorMeta(DataType type, const DDim& dims)
+    : dims(dims), type(type) {}
+
+inline DenseTensorMeta::DenseTensorMeta(DataType type,
+                                        const DDim& dims,
+                                        DataLayout layout)
+    : dims(dims), type(type), layout(layout) {}
+
+inline DenseTensorMeta::DenseTensorMeta(
+    DataType type,
+    const DDim& dims,
+    DataLayout layout,
+    const std::vector<std::vector<size_t>>& lod)
+    : dims(dims), type(type), layout(layout), lod(lod) {}
+
+inline bool DenseTensorMeta::valid() const noexcept {
+  bool valid{true};
+  valid = valid && (type != DataType::UNDEFINED);
+  valid = valid && (layout != DataLayout::UNDEFINED);
+  valid = valid && (is_scalar || product(dims));
+  return valid;
+}
+
 }  // namespace pten
diff --git a/paddle/pten/hapi/CMakeLists.txt b/paddle/pten/hapi/CMakeLists.txt
index 8a33de85bddd3..4b427b3b4a383 100644
--- a/paddle/pten/hapi/CMakeLists.txt
+++ b/paddle/pten/hapi/CMakeLists.txt
@@ -1,3 +1,3 @@
 add_subdirectory(lib)
 
-cc_library(pten_hapi SRCS all.cc DEPS math_api linalg_api creation_api)
+cc_library(pten_hapi SRCS all.cc DEPS linalg_api math_api creation_api)
diff --git a/paddle/pten/hapi/lib/creation.cc b/paddle/pten/hapi/lib/creation.cc
index 5048b983b122f..cda8d24b5e6ad 100644
--- a/paddle/pten/hapi/lib/creation.cc
+++ b/paddle/pten/hapi/lib/creation.cc
@@ -21,6 +21,7 @@ limitations under the License. */
 #include "paddle/pten/api/include/core.h"
 #include "paddle/pten/api/include/infershape.h"
 #include "paddle/pten/hapi/lib/kernel_dispatch.h"
+#include "paddle/pten/hapi/lib/utils/allocator.h"
 
 namespace paddle {
 namespace experimental {
@@ -50,10 +51,12 @@ Tensor full_like(const Tensor& x,
   Tensor out;
   // InferDataType
   if (dtype != pten::DataType::UNDEFINED) {
-    out_meta.type = dtype;
+    const_cast<pten::DenseTensorMeta::DataType&>(out_meta.type) = dtype;
   }
-  auto dense_out =
-      std::make_shared<pten::DenseTensor>(out_meta, pten::TensorStatus());
+  const auto allocator =
+      std::make_shared<paddle::experimental::DefaultAllocator>(
+          pten::TransToFluidPlace(kernel_key.backend()));
+  auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
   kernel_context.EmplaceBackOutput(dense_out);
   out.set_impl(dense_out);
 
diff --git a/paddle/pten/hapi/lib/linalg.cc b/paddle/pten/hapi/lib/linalg.cc
index 1269702f28f91..54829feb43a24 100644
--- a/paddle/pten/hapi/lib/linalg.cc
+++ b/paddle/pten/hapi/lib/linalg.cc
@@ -24,6 +24,7 @@ limitations under the License. */
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/core/kernel_context.h"
 #include "paddle/pten/hapi/lib/kernel_dispatch.h"
+#include "paddle/pten/hapi/lib/utils/allocator.h"
 #include "paddle/pten/infershape/binary.h"
 
 namespace paddle {
@@ -52,8 +53,9 @@ Tensor dot(const Tensor& x, const Tensor& y) {
 
   // 5. Prepare outputs
   Tensor out;
-  auto dense_out =
-      std::make_shared<pten::DenseTensor>(out_meta, pten::TensorStatus());
+  const auto allocator = std::make_shared<DefaultAllocator>(
+      pten::TransToFluidPlace(kernel_key.backend()));
+  auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
   kernel_context.EmplaceBackOutput(dense_out);
   out.set_impl(dense_out);
 
diff --git a/paddle/pten/hapi/lib/manipulation.cc b/paddle/pten/hapi/lib/manipulation.cc
index 4b9b66b9df0bd..fa60bac6d1aed 100644
--- a/paddle/pten/hapi/lib/manipulation.cc
+++ b/paddle/pten/hapi/lib/manipulation.cc
@@ -19,6 +19,7 @@ limitations under the License. */
 #include "glog/logging.h"
 #include "paddle/pten/api/include/core.h"
 #include "paddle/pten/hapi/lib/kernel_dispatch.h"
+#include "paddle/pten/hapi/lib/utils/allocator.h"
 #include "paddle/pten/infershape/unary.h"
 
 namespace paddle {
@@ -46,8 +47,9 @@ Tensor flatten(const Tensor& x, int start_axis, int stop_axis) {
 
   // 5. Prepare outputs
   Tensor out;
-  auto dense_out =
-      std::make_shared<pten::DenseTensor>(out_meta, pten::TensorStatus());
+  const auto allocator = std::make_shared<DefaultAllocator>(
+      pten::TransToFluidPlace(kernel_key.backend()));
+  auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
   kernel_context.EmplaceBackOutput(dense_out);
   out.set_impl(dense_out);
 
diff --git a/paddle/pten/hapi/lib/math.cc b/paddle/pten/hapi/lib/math.cc
index 851a9bc155cdd..5e4e96d333030 100644
--- a/paddle/pten/hapi/lib/math.cc
+++ b/paddle/pten/hapi/lib/math.cc
@@ -21,6 +21,7 @@ limitations under the License. */
 #include "paddle/pten/api/include/core.h"
 #include "paddle/pten/api/include/infershape.h"
 #include "paddle/pten/hapi/lib/kernel_dispatch.h"
+#include "paddle/pten/hapi/lib/utils/allocator.h"
 #include "paddle/pten/infershape/unary.h"
 
 namespace paddle {
@@ -46,8 +47,10 @@ Tensor mean(const Tensor& x) {
 
   // 5. Prepare outputs
   Tensor out;
-  auto dense_out =
-      std::make_shared<pten::DenseTensor>(out_meta, pten::TensorStatus());
+  const auto allocator =
+      std::make_shared<paddle::experimental::DefaultAllocator>(
+          pten::TransToFluidPlace(kernel_key.backend()));
+  auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
   kernel_context.EmplaceBackOutput(dense_out);
   out.set_impl(dense_out);
 
diff --git a/paddle/pten/hapi/lib/utils/CMakeLists.txt b/paddle/pten/hapi/lib/utils/CMakeLists.txt
index 4ab33a10dcdc4..c89ef812846ad 100644
--- a/paddle/pten/hapi/lib/utils/CMakeLists.txt
+++ b/paddle/pten/hapi/lib/utils/CMakeLists.txt
@@ -1,3 +1,4 @@
 add_subdirectory(tests)
 
-cc_library(pten_hapi_utils SRCS allocator.cc storage tensor_utils DEPS tensor_base pten_dense_tensor pten_utils)
+cc_library(pten_hapi_utils SRCS allocator.cc storage.cc tensor_utils.cc DEPS tensor_base convert_utils
+dense_tensor lod_tensor selected_rows place var_type_traits)
diff --git a/paddle/pten/hapi/lib/utils/tensor_utils.cc b/paddle/pten/hapi/lib/utils/tensor_utils.cc
index be7feebe8c206..2fb39852702c2 100644
--- a/paddle/pten/hapi/lib/utils/tensor_utils.cc
+++ b/paddle/pten/hapi/lib/utils/tensor_utils.cc
@@ -15,5 +15,113 @@ limitations under the License. */
 #include "paddle/pten/hapi/lib/utils/tensor_utils.h"
 
 namespace paddle {
-namespace experimental {}  // namespace experimental
+namespace experimental {
+
+template <typename DstLoD, typename SrcLoD>
+void SetLoD(DstLoD* dst, const SrcLoD& src) {
+  dst->reserve(src.size());
+  dst->clear();
+  for (auto&& v : src) {
+    dst->emplace_back(v);
+  }
+}
+
+std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
+    const paddle::framework::Tensor& src) {
+  pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()),
+                             src.dims(),
+                             pten::TransToPtenDataLayout(src.layout())};
+  auto shared_storage = pten::make_intrusive<SharedStorage>(src.Holder());
+  return std::make_unique<pten::DenseTensor>(std::move(shared_storage),
+                                             std::move(meta));
+}
+
+std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
+    const paddle::framework::LoDTensor& src) {
+  pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()),
+                             src.dims(),
+                             pten::TransToPtenDataLayout(src.layout())};
+  SetLoD(&meta.lod, src.lod());
+  auto shared_storage = pten::make_intrusive<SharedStorage>(src.Holder());
+  return std::make_unique<pten::DenseTensor>(std::move(shared_storage),
+                                             std::move(meta));
+}
+
+std::unique_ptr<pten::TensorBase> MakePtenTensorBaseFromVar(
+    const framework::Variable& variable, const pten::TensorArgDef& arg_def) {
+  auto expected_place = pten::TransToFluidPlace(arg_def.backend);
+
+  if (variable.IsType<framework::LoDTensor>()) {
+    const auto& tensor = variable.Get<framework::LoDTensor>();
+    if (!platform::is_same_place(tensor.place(), expected_place)) {
+      framework::LoDTensor tmp_tensor;
+      framework::TensorCopySync(tensor, expected_place, &tmp_tensor);
+      return MakePtenDenseTensor(tmp_tensor);
+    } else {
+      return MakePtenDenseTensor(tensor);
+    }
+  } else if (variable.IsType<framework::SelectedRows>()) {
+    // TODO(chenweihang): now we don't deal with row and height
+    // by xiaowei's advice
+    const auto& tensor = variable.Get<framework::SelectedRows>();
+    if (!platform::is_same_place(tensor.value().place(), expected_place)) {
+      framework::Tensor tmp_tensor;
+      TensorCopySync(tensor.value(), expected_place, &tmp_tensor);
+      // TODO(chenweihang): adapt SelectedRows by xiaowei's design
+      return MakePtenDenseTensor(tmp_tensor);
+    } else {
+      return MakePtenDenseTensor(tensor.value());
+    }
+  } else {
+    PADDLE_THROW(platform::errors::Unimplemented(
+        "Unsupported shared input `%s` type now when call pt kernel.",
+        framework::ToTypeName(variable.Type())));
+  }
+  return {};
+}
+
+std::unique_ptr<pten::TensorBase> MakePtenTensorBaseFromVar(
+    framework::Variable* variable, const pten::TensorArgDef& arg_def) {
+  // mutable_data before run kernel, to avoid share output form
+  // KernelContext to original tensor
+  if (variable->template IsType<framework::LoDTensor>()) {
+    auto* tensor = variable->template GetMutable<framework::LoDTensor>();
+    tensor->mutable_data(pten::TransToFluidPlace(arg_def.backend),
+                         pten::TransToProtoVarType(arg_def.dtype));
+    return MakePtenDenseTensor(*tensor);
+  } else if (variable->template IsType<framework::SelectedRows>()) {
+    auto* tensor = variable->template GetMutable<framework::SelectedRows>();
+    tensor->mutable_value()->mutable_data(
+        pten::TransToFluidPlace(arg_def.backend),
+        pten::TransToProtoVarType(arg_def.dtype));
+    // TODO(chenweihang): adapt SelectedRows by xiaowei's design,
+    // here the row and height will lost in output!
+    return MakePtenDenseTensor(tensor->value());
+  } else {
+    PADDLE_THROW(platform::errors::Unimplemented(
+        "Unsupported shared output `%s` type now when call pt kernel.",
+        framework::ToTypeName(variable->Type())));
+  }
+  return {};
+}
+
+void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
+  CHECK(src);
+  CHECK(dst);
+  dst->Resize(src->dims());
+  auto storage = src->release();
+  CHECK(storage->OwnsMemory());
+  std::shared_ptr<paddle::memory::allocation::Allocation> holder(
+      new TensorStorage(std::move(storage)));
+  dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->data_type()));
+}
+
+void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) {
+  CHECK(src);
+  CHECK(dst);
+  SetLoD(dst->mutable_lod(), src->lod());
+  MovesStorage(src, static_cast<paddle::framework::Tensor*>(dst));
+}
+
+}  // namespace experimental
 }  // namespace paddle
diff --git a/paddle/pten/hapi/lib/utils/tensor_utils.h b/paddle/pten/hapi/lib/utils/tensor_utils.h
index c9d2f8ca32963..a2b2688362a4c 100644
--- a/paddle/pten/hapi/lib/utils/tensor_utils.h
+++ b/paddle/pten/hapi/lib/utils/tensor_utils.h
@@ -17,64 +17,32 @@ limitations under the License. */
 #include <memory>
 
 #include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/variable.h"
 
-#include "paddle/pten/core/candidate/dense_tensor.h"
 #include "paddle/pten/core/convert_utils.h"
+#include "paddle/pten/core/dense_tensor.h"
+#include "paddle/pten/core/kernel_factory.h"
 #include "paddle/pten/hapi/lib/utils/allocator.h"
 #include "paddle/pten/hapi/lib/utils/storage.h"
 
 namespace paddle {
 namespace experimental {
 
-using namespace pten::candidate;  // NOLINT
+std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
+    const paddle::framework::Tensor& src);
 
-template <typename DstLoD, typename SrcLoD>
-void SetLoD(DstLoD* dst, const SrcLoD& src) {
-  dst->reserve(src.size());
-  dst->clear();
-  for (auto&& v : src) {
-    dst->emplace_back(v);
-  }
-}
+std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
+    const paddle::framework::LoDTensor& src);
 
-std::shared_ptr<DenseTensor> MakeSharedDenseTensor(
-    const paddle::framework::Tensor& src) {
-  DenseTensorMeta meta{pten::TransToPtenDataType(src.type()),
-                       src.dims(),
-                       pten::TransToPtenDataLayout(src.layout())};
-  auto shared_storage = pten::make_intrusive<SharedStorage>(src.Holder());
-  return std::make_shared<DenseTensor>(std::move(shared_storage),
-                                       std::move(meta));
-}
+std::unique_ptr<pten::TensorBase> MakePtenTensorBaseFromVar(
+    const framework::Variable& variable, const pten::TensorArgDef& arg_def);
 
-std::shared_ptr<DenseTensor> MakeSharedDenseTensor(
-    const paddle::framework::LoDTensor& src) {
-  DenseTensorMeta meta{pten::TransToPtenDataType(src.type()),
-                       src.dims(),
-                       pten::TransToPtenDataLayout(src.layout())};
-  SetLoD(&meta.lod, src.lod());
-  auto shared_storage = pten::make_intrusive<SharedStorage>(src.Holder());
-  return std::make_shared<DenseTensor>(std::move(shared_storage),
-                                       std::move(meta));
-}
+std::unique_ptr<pten::TensorBase> MakePtenTensorBaseFromVar(
+    framework::Variable* variable, const pten::TensorArgDef& arg_def);
 
-void MovesStorage(DenseTensor* src, paddle::framework::Tensor* dst) {
-  CHECK(src);
-  CHECK(dst);
-  dst->Resize(src->dims());
-  auto storage = src->release();
-  CHECK(storage->OwnsMemory());
-  std::shared_ptr<paddle::memory::allocation::Allocation> holder(
-      new TensorStorage(std::move(storage)));
-  dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->data_type()));
-}
+void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst);
 
-void MovesStorage(DenseTensor* src, paddle::framework::LoDTensor* dst) {
-  CHECK(src);
-  CHECK(dst);
-  SetLoD(dst->mutable_lod(), src->lod());
-  MovesStorage(src, static_cast<paddle::framework::Tensor*>(dst));
-}
+void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst);
 
 }  // namespace experimental
 }  // namespace paddle
diff --git a/paddle/pten/hapi/lib/utils/tests/test_tensor_utils.cc b/paddle/pten/hapi/lib/utils/tests/test_tensor_utils.cc
index f45537508d29a..56184eec70f26 100644
--- a/paddle/pten/hapi/lib/utils/tests/test_tensor_utils.cc
+++ b/paddle/pten/hapi/lib/utils/tests/test_tensor_utils.cc
@@ -24,8 +24,8 @@ using DDim = paddle::framework::DDim;
 using DataType = paddle::experimental::DataType;
 using DataLayout = paddle::experimental::DataLayout;
 
-using DenseTensor = pten::candidate::DenseTensor;
-using DenseTensorMeta = pten::candidate::DenseTensorMeta;
+using DenseTensor = pten::DenseTensor;
+using DenseTensorMeta = pten::DenseTensorMeta;
 
 TEST(tensor_utils, dense_tensor_to_lod_tensor) {
   const DDim dims({2, 1});
@@ -56,7 +56,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) {
   CHECK(lod_tensor.data<float>()[0] == 1.0f);
   CHECK(lod_tensor.data<float>()[1] == 2.1f);
 
-  auto dense_tensor_1 = MakeSharedDenseTensor(lod_tensor);
+  auto dense_tensor_1 = MakePtenDenseTensor(lod_tensor);
   CHECK(dense_tensor_1->dims() == dims);
   CHECK(dense_tensor_1->data_type() == dtype);
   CHECK(dense_tensor_1->layout() == layout);
@@ -90,7 +90,7 @@ TEST(tensor_utils, dense_tensor_to_tensor) {
   CHECK(tensor.data<float>()[0] == 1.0f);
   CHECK(tensor.data<float>()[1] == 2.1f);
 
-  auto dense_tensor_1 = MakeSharedDenseTensor(tensor);
+  auto dense_tensor_1 = MakePtenDenseTensor(tensor);
   CHECK(dense_tensor_1->dims() == dims);
   CHECK(dense_tensor_1->data_type() == dtype);
   CHECK(dense_tensor_1->layout() == layout);
@@ -99,6 +99,27 @@ TEST(tensor_utils, dense_tensor_to_tensor) {
   CHECK(data_1[1] == 2.1f);
 }
 
+TEST(PtenUtils, VarToPtTensor) {
+  // 1. create Variable
+  paddle::framework::Variable v;
+  auto selected_rows = v.GetMutable<paddle::framework::SelectedRows>();
+  paddle::framework::Tensor* value = selected_rows->mutable_value();
+  auto* data = value->mutable_data<int>(paddle::framework::make_ddim({1, 1}),
+                                        paddle::platform::CPUPlace());
+  data[0] = 123;
+  pten::Backend expect_backend = pten::Backend::CPU;
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+  expect_backend = pten::Backend::CUDA;
+#endif
+  auto tensor_def = pten::TensorArgDef(
+      expect_backend, pten::DataLayout::NCHW, pten::DataType::INT32);
+  // 2. test API
+  auto tensor_x = MakePtenTensorBaseFromVar(v, tensor_def);
+  // 3. check result
+  ASSERT_EQ(tensor_x->data_type(), pten::DataType::INT32);
+}
+
 }  // namespace tests
 }  // namespace experimental
 }  // namespace paddle
diff --git a/paddle/pten/infershape/binary.cc b/paddle/pten/infershape/binary.cc
index 7d224835cc05a..c2b88c74d847e 100644
--- a/paddle/pten/infershape/binary.cc
+++ b/paddle/pten/infershape/binary.cc
@@ -17,7 +17,8 @@ limitations under the License. */
 
 namespace pten {
 
-TensorMeta DotInferShape(const TensorMeta& x_meta, const TensorMeta& y_meta) {
+DenseTensorMeta DotInferShape(const DenseTensorMeta& x_meta,
+                              const DenseTensorMeta& y_meta) {
   auto x_dims = x_meta.dims;
   auto x_rank = static_cast<size_t>(x_dims.size());
   PADDLE_ENFORCE_EQ(true,
@@ -54,8 +55,7 @@ TensorMeta DotInferShape(const TensorMeta& x_meta, const TensorMeta& y_meta) {
                         y_dims.to_str()));
 
   x_dims[x_dims.size() - 1] = 1;
-  TensorMeta return_meta(
-      x_dims, x_meta.backend, x_meta.type, x_meta.layout, x_meta.offset);
+  DenseTensorMeta return_meta(x_meta.type, x_dims, x_meta.layout);
   return return_meta;
 }
 
diff --git a/paddle/pten/infershape/binary.h b/paddle/pten/infershape/binary.h
index 8e44b520e0a9f..613d2f66a6edd 100644
--- a/paddle/pten/infershape/binary.h
+++ b/paddle/pten/infershape/binary.h
@@ -21,15 +21,19 @@ namespace pten {
 
 // Common InferShape Functions for binary operators, The format like:
 //
-//   1. TensorMeta [OpName]InferShape(const TensorMeta& x_meta, ...) {}
-//   2. std::pair<TensorMeta, TensorMeta> [OpName]InferShape(const TensorMeta&
+//   1. DenseTensorMeta [OpName]InferShape(const DenseTensorMeta& x_meta, ...)
+//   {}
+//   2. std::pair<DenseTensorMeta, DenseTensorMeta> [OpName]InferShape(const
+//   DenseTensorMeta&
 //   x_meta, ...) {}
-//   3. std::tuple<TensorMeta, TensorMeta, TensorMeta> [OpName]InferShape(const
-//   TensorMeta& x_meta, ...)
+//   3. std::tuple<DenseTensorMeta, DenseTensorMeta, DenseTensorMeta>
+//   [OpName]InferShape(const
+//   DenseTensorMeta& x_meta, ...)
 //  NOTE: The name "InferShape" may be not appropriate. "InferMeta" may be good.
 //  Because functions in this file
 //  not only can infer shape, but alse need infer lod or other useful data.
 
-TensorMeta DotInferShape(const TensorMeta& x_meta, const TensorMeta& y_meta);
+DenseTensorMeta DotInferShape(const DenseTensorMeta& x_meta,
+                              const DenseTensorMeta& y_meta);
 
 }  // namespace pten
diff --git a/paddle/pten/infershape/unary.cc b/paddle/pten/infershape/unary.cc
index 57e74345b7d42..4e743261b5906 100644
--- a/paddle/pten/infershape/unary.cc
+++ b/paddle/pten/infershape/unary.cc
@@ -17,18 +17,19 @@ limitations under the License. */
 
 namespace pten {
 
-TensorMeta UnchangedInferShape(const TensorMeta& x_meta) { return x_meta; }
+DenseTensorMeta UnchangedInferShape(const DenseTensorMeta& x_meta) {
+  return x_meta;
+}
 
-TensorMeta ReductionInferShape(const TensorMeta& x_meta) {
+DenseTensorMeta ReductionInferShape(const DenseTensorMeta& x_meta) {
   const auto& out_dims = paddle::framework::make_ddim({1});
-  TensorMeta return_meta(
-      out_dims, x_meta.backend, x_meta.type, x_meta.layout, x_meta.offset);
+  DenseTensorMeta return_meta(x_meta.type, out_dims, x_meta.layout);
   return return_meta;
 }
 
-TensorMeta FlattenInferShape(const TensorMeta& x_meta,
-                             int start_axis,
-                             int stop_axis) {
+DenseTensorMeta FlattenInferShape(const DenseTensorMeta& x_meta,
+                                  int start_axis,
+                                  int stop_axis) {
   auto& x_dims = x_meta.dims;
   int in_dims_size = x_dims.size();
   if (start_axis < 0) {
@@ -62,8 +63,7 @@ TensorMeta FlattenInferShape(const TensorMeta& x_meta,
     out_shape.push_back(x_dims[i]);
   }
   const auto& out_dims = paddle::framework::make_ddim(out_shape);
-  TensorMeta return_meta(
-      out_dims, x_meta.backend, x_meta.type, x_meta.layout, x_meta.offset);
+  DenseTensorMeta return_meta(x_meta.type, out_dims, x_meta.layout);
 
   if (x_dims[0] == return_meta.dims[0]) {
     // Only pass LoD when the first dimension of output and Input(X)
diff --git a/paddle/pten/infershape/unary.h b/paddle/pten/infershape/unary.h
index 1d8fac05d0eaa..1db0b094eba3a 100644
--- a/paddle/pten/infershape/unary.h
+++ b/paddle/pten/infershape/unary.h
@@ -21,21 +21,24 @@ namespace pten {
 
 // Common InferShape Functions for unary operators, The format like:
 //
-//   1. TensorMeta [OpName]InferShape(const TensorMeta& x_meta, ...) {}
-//   2. std::pair<TensorMeta, TensorMeta> [OpName]InferShape(const TensorMeta&
+//   1. DenseTensorMeta [OpName]InferShape(const DenseTensorMeta& x_meta, ...)
+//   {}
+//   2. std::pair<DenseTensorMeta, DenseTensorMeta> [OpName]InferShape(const
+//   DenseTensorMeta&
 //   x_meta, ...) {}
-//   3. std::tuple<TensorMeta, TensorMeta, TensorMeta> [OpName]InferShape(const
-//   TensorMeta& x_meta, ...)
+//   3. std::tuple<DenseTensorMeta, DenseTensorMeta, DenseTensorMeta>
+//   [OpName]InferShape(const
+//   DenseTensorMeta& x_meta, ...)
 //  NOTE: The name "InferShape" may be not appropriate. "InferMeta" may be good.
 //  Because functions in this file
 //  not only can infer shape, but alse need infer lod or other useful data.
 
-TensorMeta UnchangedInferShape(const TensorMeta& x_meta);
+DenseTensorMeta UnchangedInferShape(const DenseTensorMeta& x_meta);
 
-TensorMeta ReductionInferShape(const TensorMeta& x_meta);
+DenseTensorMeta ReductionInferShape(const DenseTensorMeta& x_meta);
 
-TensorMeta FlattenInferShape(const TensorMeta& x_meta,
-                             int start_axis,
-                             int stop_axis);
+DenseTensorMeta FlattenInferShape(const DenseTensorMeta& x_meta,
+                                  int start_axis,
+                                  int stop_axis);
 
 }  // namespace pten
diff --git a/paddle/pten/kernels/cpu/CMakeLists.txt b/paddle/pten/kernels/cpu/CMakeLists.txt
index ad18a2f555265..2c4a424e48492 100644
--- a/paddle/pten/kernels/cpu/CMakeLists.txt
+++ b/paddle/pten/kernels/cpu/CMakeLists.txt
@@ -1,5 +1,5 @@
 cc_library(math_cpu SRCS math.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
 cc_library(linalg_cpu SRCS linalg.cc DEPS dense_tensor kernel_context kernel_factory)
 cc_library(creation_cpu SRCS creation.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
-cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory)
+cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
 cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_cpu unary)
diff --git a/paddle/pten/kernels/cpu/manipulation.cc b/paddle/pten/kernels/cpu/manipulation.cc
index ac7a8eaba8cf5..c436e14e0caab 100644
--- a/paddle/pten/kernels/cpu/manipulation.cc
+++ b/paddle/pten/kernels/cpu/manipulation.cc
@@ -26,7 +26,7 @@ void Flatten(const CPUContext& dev_ctx,
              DenseTensor* out) {
   auto out_meta = FlattenInferShape(x.meta(), start_axis, stop_axis);
   pten::Copy(dev_ctx, x, out);
-  out->mutable_meta()->lod = out_meta.lod;
+  out->set_lod(out_meta.lod);
   out->Resize(out_meta.dims);
 }
 
@@ -47,8 +47,8 @@ void FlattenWithXShape(const CPUContext& dev_ctx,
   for (int i = 0; i < in_dims.size(); ++i) {
     xshape_dims[i + 1] = in_dims[i];
   }
-  xshape->mutable_meta()->dims = paddle::framework::make_ddim(xshape_dims);
-  xshape->mutable_meta()->lod = x.meta().lod;
+  xshape->Resize(paddle::framework::make_ddim(xshape_dims));
+  xshape->set_lod(x.lod());
 }
 
 }  // namespace pten
diff --git a/paddle/pten/kernels/cpu/utils.cc b/paddle/pten/kernels/cpu/utils.cc
index b17b6512178d0..1f9d675deafa2 100644
--- a/paddle/pten/kernels/cpu/utils.cc
+++ b/paddle/pten/kernels/cpu/utils.cc
@@ -24,7 +24,6 @@ void Copy(const CPUContext& dev_ctx, const DenseTensor& src, DenseTensor* dst) {
   auto* dst_ptr = dst->mutable_data();
   const auto& src_place = src.place();
   const auto& dst_place = dst->place();
-  src.CheckMemorySize();
 
   if (src_ptr == dst_ptr && src_place == dst_place) {
     VLOG(3) << "Skip copy the same data async from " << src_place << " to "
@@ -36,7 +35,7 @@ void Copy(const CPUContext& dev_ctx, const DenseTensor& src, DenseTensor* dst) {
   VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
           << dst_place;
   dst->Resize(src.dims());
-  dst->mutable_meta()->layout = src.meta().layout;
+  CHECK(dst->layout() == src.layout());
   auto size = src.numel() * paddle::framework::SizeOfType(
                                 TransToProtoVarType(src.data_type()));
 
diff --git a/paddle/pten/kernels/cuda/CMakeLists.txt b/paddle/pten/kernels/cuda/CMakeLists.txt
index 54df37ecb5e26..9e86d9521c99a 100644
--- a/paddle/pten/kernels/cuda/CMakeLists.txt
+++ b/paddle/pten/kernels/cuda/CMakeLists.txt
@@ -2,12 +2,12 @@ if(WITH_GPU)
   nv_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory)
   nv_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
   nv_library(creation_cuda SRCS creation.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
-  nv_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory)
+  nv_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
   nv_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary)
 elseif(WITH_ROCM)
   hip_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory)
   hip_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
   hip_library(creation_cuda SRCS creation.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
-  hip_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory)
+  hip_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
   hip_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary)
 endif()
diff --git a/paddle/pten/kernels/cuda/manipulation.cu b/paddle/pten/kernels/cuda/manipulation.cu
index 13bc109faaba3..43614f859c58b 100644
--- a/paddle/pten/kernels/cuda/manipulation.cu
+++ b/paddle/pten/kernels/cuda/manipulation.cu
@@ -26,7 +26,7 @@ void Flatten(const CUDAContext& dev_ctx,
              DenseTensor* out) {
   auto out_meta = FlattenInferShape(x.meta(), start_axis, stop_axis);
   pten::Copy(dev_ctx, x, out);
-  out->mutable_meta()->lod = out_meta.lod;
+  out->set_lod(out_meta.lod);
   out->Resize(out_meta.dims);
 }
 
@@ -47,8 +47,8 @@ void FlattenWithXShape(const CUDAContext& dev_ctx,
   for (int i = 0; i < in_dims.size(); ++i) {
     xshape_dims[i + 1] = in_dims[i];
   }
-  xshape->mutable_meta()->dims = paddle::framework::make_ddim(xshape_dims);
-  xshape->mutable_meta()->lod = x.meta().lod;
+  xshape->Resize(paddle::framework::make_ddim(xshape_dims));
+  xshape->set_lod(x.lod());
 }
 
 }  // namespace pten
diff --git a/paddle/pten/kernels/cuda/math.cu b/paddle/pten/kernels/cuda/math.cu
index 4ebe58629545e..1f2a34ea505c2 100644
--- a/paddle/pten/kernels/cuda/math.cu
+++ b/paddle/pten/kernels/cuda/math.cu
@@ -30,6 +30,7 @@ namespace cub = hipcub;
 #include "paddle/fluid/platform/float16.h"
 #include "paddle/pten/core/convert_utils.h"
 #include "paddle/pten/core/kernel_registry.h"
+#include "paddle/pten/hapi/lib/utils/tensor_utils.h"
 
 namespace pten {
 
@@ -75,16 +76,21 @@ void Mean(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out) {
       nullptr, temp_storage_bytes, trans_x, out_data, size_prob, stream);
   PADDLE_ENFORCE_CUDA_SUCCESS(err);
 
+  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
+      dev_ctx.GetPlace());
   pten::DenseTensor tmp(
-      TensorMeta(paddle::framework::make_ddim(
-                     {static_cast<int64_t>(temp_storage_bytes)}),
-                 pten::TransToPtenBackend(dev_ctx.GetPlace()),
-                 x.data_type(),
-                 x.layout()),
-      TensorStatus());
-  auto* temp_storage = tmp.mutable_data<uint8_t>();
-  err = cub::DeviceReduce::Sum(
-      temp_storage, temp_storage_bytes, trans_x, out_data, size_prob, stream);
+      alloc,
+      DenseTensorMeta(x.data_type(),
+                      paddle::framework::make_ddim(
+                          {static_cast<int64_t>(temp_storage_bytes)}),
+                      x.layout()));
+  void* temp_storage = tmp.mutable_data<T>();
+  err = cub::DeviceReduce::Sum(static_cast<uint8_t*>(temp_storage),
+                               temp_storage_bytes,
+                               trans_x,
+                               out_data,
+                               size_prob,
+                               stream);
   PADDLE_ENFORCE_CUDA_SUCCESS(err);
 }
 
diff --git a/paddle/pten/kernels/cuda/utils.cu b/paddle/pten/kernels/cuda/utils.cu
index 74e070880e106..e81e00a5873f7 100644
--- a/paddle/pten/kernels/cuda/utils.cu
+++ b/paddle/pten/kernels/cuda/utils.cu
@@ -27,7 +27,6 @@ void Copy(const CUDAContext& dev_ctx,
   auto* dst_ptr = dst->mutable_data();
   const auto& src_place = src.place();
   const auto& dst_place = dst->place();
-  src.CheckMemorySize();
 
   if (src_ptr == dst_ptr && src_place == dst_place) {
     VLOG(3) << "Skip copy the same data async from " << src_place << " to "
@@ -39,7 +38,7 @@ void Copy(const CUDAContext& dev_ctx,
   VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
           << dst_place;
   dst->Resize(src.dims());
-  dst->mutable_meta()->layout = src.meta().layout;
+  CHECK(dst->layout() == src.layout());
   auto size = src.numel() * paddle::framework::SizeOfType(
                                 TransToProtoVarType(src.data_type()));
 
diff --git a/paddle/pten/kernels/functions/eigen/dot.h b/paddle/pten/kernels/functions/eigen/dot.h
index 605517bad6a9a..300da4ae1f13b 100644
--- a/paddle/pten/kernels/functions/eigen/dot.h
+++ b/paddle/pten/kernels/functions/eigen/dot.h
@@ -28,7 +28,6 @@ void Dot(const DevCtx& dev_ctx,
          const DenseTensor& x,
          const DenseTensor& y,
          DenseTensor* out) {
-  out->mutable_data();
   if (1 == out->dims().size()) {
     auto eigen_out = pten::EigenScalar<T>::From(*out);
     auto eigen_x = pten::EigenVector<T>::Flatten(x);
diff --git a/paddle/pten/kernels/functions/eigen/mean.h b/paddle/pten/kernels/functions/eigen/mean.h
index 574a1957ae558..ee4bf1653f23a 100644
--- a/paddle/pten/kernels/functions/eigen/mean.h
+++ b/paddle/pten/kernels/functions/eigen/mean.h
@@ -25,8 +25,6 @@ namespace eigen {
 
 template <typename DevCtx, typename T>
 void Mean(const DevCtx& dev_ctx, const DenseTensor& x, DenseTensor* out) {
-  out->mutable_data<T>();
-
   // TODO(chenweihang): if we design new tensor, we should support
   // the low-level calc functor use new tensor as input,
   // which may be a big project!
diff --git a/paddle/pten/tests/CMakeLists.txt b/paddle/pten/tests/CMakeLists.txt
index d30ac2578d00b..21ce2f74df945 100644
--- a/paddle/pten/tests/CMakeLists.txt
+++ b/paddle/pten/tests/CMakeLists.txt
@@ -3,8 +3,8 @@ cc_test(pten_data_layout_test SRCS data_layout_test.cc DEPS gtest)
 cc_test(pten_data_type_test SRCS data_type_test.cc DEPS gtest)
 cc_test(dense_tensor_test SRCS dense_tensor_test.cc DEPS dense_tensor)
 cc_test(kernel_factory_test SRCS kernel_factory_test.cc DEPS kernel_factory)
-cc_test(test_mean_api SRCS test_mean_api.cc DEPS math_api)
-cc_test(test_dot_api SRCS test_dot_api.cc DEPS linalg_api)
-cc_test(test_fill_api SRCS test_fill_api.cc DEPS creation_api)
-cc_test(test_copy_api SRCS test_copy_api.cc DEPS utils_cpu)
-cc_test(test_flatten_api SRCS test_flatten_api.cc DEPS utils_cpu manipulation_api)
+cc_test(test_mean_api SRCS test_mean_api.cc DEPS math_api pten_hapi_utils)
+cc_test(test_dot_api SRCS test_dot_api.cc DEPS linalg_api pten_hapi_utils)
+cc_test(test_fill_api SRCS test_fill_api.cc DEPS creation_api pten_hapi_utils)
+cc_test(test_copy_api SRCS test_copy_api.cc DEPS utils_cpu pten_hapi_utils)
+cc_test(test_flatten_api SRCS test_flatten_api.cc DEPS utils_cpu manipulation_api pten_hapi_utils)
diff --git a/paddle/pten/tests/dense_tensor_test.cc b/paddle/pten/tests/dense_tensor_test.cc
index 722eab17ec412..e74917263dafb 100644
--- a/paddle/pten/tests/dense_tensor_test.cc
+++ b/paddle/pten/tests/dense_tensor_test.cc
@@ -18,16 +18,3 @@ limitations under the License. */
 
 namespace framework = paddle::framework;
 using DDim = paddle::framework::DDim;
-
-TEST(DenseTensor, Constructor) {
-  pten::DenseTensor tensor(pten::TensorMeta(framework::make_ddim({5, 10}),
-                                            pten::Backend::CPU,
-                                            pten::DataType::FLOAT32,
-                                            pten::DataLayout::NCHW,
-                                            0UL),
-                           pten::TensorStatus());
-  ASSERT_EQ(tensor.dims().size(), 2);
-  ASSERT_EQ(tensor.backend(), pten::Backend::CPU);
-  ASSERT_EQ(tensor.data_type(), pten::DataType::FLOAT32);
-  ASSERT_EQ(tensor.layout(), pten::DataLayout::NCHW);
-}
diff --git a/paddle/pten/tests/test_copy_api.cc b/paddle/pten/tests/test_copy_api.cc
index 39533c73a2564..fcebe9a310dea 100644
--- a/paddle/pten/tests/test_copy_api.cc
+++ b/paddle/pten/tests/test_copy_api.cc
@@ -19,6 +19,7 @@ limitations under the License. */
 #include "paddle/pten/kernels/cpu/utils.h"
 
 #include "paddle/pten/core/dense_tensor.h"
+#include "paddle/pten/hapi/lib/utils/allocator.h"
 
 PT_DECLARE_MODULE(UtilsCPU);
 
@@ -30,20 +31,20 @@ using DDim = paddle::framework::DDim;
 // 'paddle/api',
 TEST(API, copy) {
   // 1. create tensor
+  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
+      paddle::platform::CPUPlace());
   auto dense_src = std::make_shared<pten::DenseTensor>(
-      pten::TensorMeta(framework::make_ddim({2, 3}),
-                       pten::Backend::CPU,
-                       pten::DataType::FLOAT32,
-                       pten::DataLayout::NCHW),
-      pten::TensorStatus());
+      alloc,
+      pten::DenseTensorMeta(pten::DataType::FLOAT32,
+                            framework::make_ddim({2, 3}),
+                            pten::DataLayout::NCHW));
   auto* dense_x_data = dense_src->mutable_data<float>();
 
   auto dense_dst = std::make_shared<pten::DenseTensor>(
-      pten::TensorMeta(framework::make_ddim({2, 3}),
-                       pten::Backend::CPU,
-                       pten::DataType::FLOAT32,
-                       pten::DataLayout::NCHW),
-      pten::TensorStatus());
+      alloc,
+      pten::DenseTensorMeta(pten::DataType::FLOAT32,
+                            framework::make_ddim({2, 3}),
+                            pten::DataLayout::NCHW));
 
   for (size_t i = 0; i < 2; ++i) {
     for (size_t j = 0; j < 3; ++j) {
diff --git a/paddle/pten/tests/test_dot_api.cc b/paddle/pten/tests/test_dot_api.cc
index affa18469ec21..69e785904fe3c 100644
--- a/paddle/pten/tests/test_dot_api.cc
+++ b/paddle/pten/tests/test_dot_api.cc
@@ -19,6 +19,7 @@ limitations under the License. */
 
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/core/kernel_registry.h"
+#include "paddle/pten/hapi/lib/utils/allocator.h"
 
 PT_DECLARE_MODULE(LinalgCPU);
 
@@ -32,20 +33,20 @@ using DDim = paddle::framework::DDim;
 // TODO(chenweihang): Remove this test after the API is used in the dygraph
 TEST(API, dot) {
   // 1. create tensor
+  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
+      paddle::platform::CPUPlace());
   auto dense_x = std::make_shared<pten::DenseTensor>(
-      pten::TensorMeta(framework::make_ddim({3, 10}),
-                       pten::Backend::CPU,
-                       pten::DataType::FLOAT32,
-                       pten::DataLayout::NCHW),
-      pten::TensorStatus());
+      alloc,
+      pten::DenseTensorMeta(pten::DataType::FLOAT32,
+                            framework::make_ddim({3, 10}),
+                            pten::DataLayout::NCHW));
   auto* dense_x_data = dense_x->mutable_data<float>();
 
   auto dense_y = std::make_shared<pten::DenseTensor>(
-      pten::TensorMeta(framework::make_ddim({3, 10}),
-                       pten::Backend::CPU,
-                       pten::DataType::FLOAT32,
-                       pten::DataLayout::NCHW),
-      pten::TensorStatus());
+      alloc,
+      pten::DenseTensorMeta(pten::DataType::FLOAT32,
+                            framework::make_ddim({3, 10}),
+                            pten::DataLayout::NCHW));
   auto* dense_y_data = dense_y->mutable_data<float>();
 
   float sum[3] = {0.0, 0.0, 0.0};
diff --git a/paddle/pten/tests/test_fill_api.cc b/paddle/pten/tests/test_fill_api.cc
index afb36f95e8a1e..c19d14efaa976 100644
--- a/paddle/pten/tests/test_fill_api.cc
+++ b/paddle/pten/tests/test_fill_api.cc
@@ -19,6 +19,7 @@ limitations under the License. */
 
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/core/kernel_registry.h"
+#include "paddle/pten/hapi/lib/utils/allocator.h"
 
 PT_DECLARE_MODULE(CreationCPU);
 
@@ -32,12 +33,14 @@ using DDim = paddle::framework::DDim;
 // TODO(chenweihang): Remove this test after the API is used in the dygraph
 TEST(API, full_like) {
   // 1. create tensor
+  // 1. create tensor
+  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
+      paddle::platform::CPUPlace());
   auto dense_x = std::make_shared<pten::DenseTensor>(
-      pten::TensorMeta(framework::make_ddim({3, 2}),
-                       pten::Backend::CPU,
-                       pten::DataType::FLOAT32,
-                       pten::DataLayout::NCHW),
-      pten::TensorStatus());
+      alloc,
+      pten::DenseTensorMeta(pten::DataType::FLOAT32,
+                            framework::make_ddim({3, 2}),
+                            pten::DataLayout::NCHW));
   auto* dense_x_data = dense_x->mutable_data<float>();
   dense_x_data[0] = 0;
 
@@ -66,12 +69,13 @@ TEST(API, full_like) {
 
 TEST(API, zeros_like) {
   // 1. create tensor
+  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
+      paddle::platform::CPUPlace());
   auto dense_x = std::make_shared<pten::DenseTensor>(
-      pten::TensorMeta(framework::make_ddim({3, 2}),
-                       pten::Backend::CPU,
-                       pten::DataType::FLOAT32,
-                       pten::DataLayout::NCHW),
-      pten::TensorStatus());
+      alloc,
+      pten::DenseTensorMeta(pten::DataType::FLOAT32,
+                            framework::make_ddim({3, 2}),
+                            pten::DataLayout::NCHW));
   auto* dense_x_data = dense_x->mutable_data<float>();
   dense_x_data[0] = 1;
 
@@ -98,13 +102,14 @@ TEST(API, zeros_like) {
 
 TEST(API, ones_like) {
   // 1. create tensor
+  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
+      paddle::platform::CPUPlace());
   auto dense_x = std::make_shared<pten::DenseTensor>(
-      pten::TensorMeta(framework::make_ddim({3, 2}),
-                       pten::Backend::CPU,
-                       pten::DataType::FLOAT32,
-                       pten::DataLayout::NCHW),
-      pten::TensorStatus());
-  auto* dense_x_data = dense_x->mutable_data<float>();
+      alloc,
+      pten::DenseTensorMeta(pten::DataType::INT32,
+                            framework::make_ddim({3, 2}),
+                            pten::DataLayout::NCHW));
+  auto* dense_x_data = dense_x->mutable_data<int32_t>();
   dense_x_data[0] = 0;
 
   paddle::experimental::Tensor x(dense_x);
@@ -122,7 +127,7 @@ TEST(API, ones_like) {
   ASSERT_EQ(out.initialized(), true);
 
   auto dense_out = std::dynamic_pointer_cast<pten::DenseTensor>(out.impl());
-  auto* actual_result = dense_out->data<float>();
+  auto* actual_result = dense_out->data<int32_t>();
   for (auto i = 0; i < 6; i++) {
     ASSERT_EQ(actual_result[i], 1);
   }
diff --git a/paddle/pten/tests/test_flatten_api.cc b/paddle/pten/tests/test_flatten_api.cc
index 7f68cd75bc8d2..48d2205c2ff48 100644
--- a/paddle/pten/tests/test_flatten_api.cc
+++ b/paddle/pten/tests/test_flatten_api.cc
@@ -19,6 +19,7 @@ limitations under the License. */
 
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/core/kernel_registry.h"
+#include "paddle/pten/hapi/lib/utils/allocator.h"
 
 PT_DECLARE_MODULE(ManipulationCPU);
 
@@ -32,12 +33,13 @@ using DDim = paddle::framework::DDim;
 // TODO(chenweihang): Remove this test after the API is used in the dygraph
 TEST(API, flatten) {
   // 1. create tensor
+  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
+      paddle::platform::CPUPlace());
   auto dense_x = std::make_shared<pten::DenseTensor>(
-      pten::TensorMeta(framework::make_ddim({3, 2, 2, 3}),
-                       pten::Backend::CPU,
-                       pten::DataType::FLOAT32,
-                       pten::DataLayout::NCHW),
-      pten::TensorStatus());
+      alloc,
+      pten::DenseTensorMeta(pten::DataType::FLOAT32,
+                            framework::make_ddim({3, 2, 2, 3}),
+                            pten::DataLayout::NCHW));
   auto* dense_x_data = dense_x->mutable_data<float>();
 
   for (int i = 0; i < dense_x->numel(); i++) {
diff --git a/paddle/pten/tests/test_mean_api.cc b/paddle/pten/tests/test_mean_api.cc
index 9c0472916e01d..ee8388671b7eb 100644
--- a/paddle/pten/tests/test_mean_api.cc
+++ b/paddle/pten/tests/test_mean_api.cc
@@ -19,6 +19,7 @@ limitations under the License. */
 
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/core/kernel_registry.h"
+#include "paddle/pten/hapi/lib/utils/allocator.h"
 
 PT_DECLARE_MODULE(MathCPU);
 
@@ -32,12 +33,13 @@ using DDim = paddle::framework::DDim;
 // TODO(chenweihang): Remove this test after the API is used in the dygraph
 TEST(API, mean) {
   // 1. create tensor
+  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
+      paddle::platform::CPUPlace());
   auto dense_x = std::make_shared<pten::DenseTensor>(
-      pten::TensorMeta(framework::make_ddim({3, 4}),
-                       pten::Backend::CPU,
-                       pten::DataType::FLOAT32,
-                       pten::DataLayout::NCHW),
-      pten::TensorStatus());
+      alloc,
+      pten::DenseTensorMeta(pten::DataType::FLOAT32,
+                            framework::make_ddim({3, 4}),
+                            pten::DataLayout::NCHW));
   auto* dense_x_data = dense_x->mutable_data<float>();
 
   float sum = 0.0;