diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc
index 5f731ea9ac8f8..a18e465b98540 100644
--- a/paddle/fluid/framework/custom_operator.cc
+++ b/paddle/fluid/framework/custom_operator.cc
@@ -207,14 +207,14 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
                 "Tensors.",
                 vec_true_outs.size(), outs.size()));
         for (size_t j = 0; j < vec_true_outs.size(); ++j) {
-          experimental::MovesStorage(
+          experimental::MovesSharedStorage(
               std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(j).impl())
                   .get(),
               vec_true_outs.at(j));
         }
       } else {
         auto* true_out = ctx.Output<Tensor>(out_name);
-        experimental::MovesStorage(
+        experimental::MovesSharedStorage(
             std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(i).impl())
                 .get(),
             true_out);
diff --git a/paddle/pten/api/lib/utils.cc b/paddle/pten/api/lib/utils.cc
index bfde9b14b0020..0032a6c62846c 100644
--- a/paddle/pten/api/lib/utils.cc
+++ b/paddle/pten/api/lib/utils.cc
@@ -20,7 +20,7 @@ limitations under the License. */
 
 #include "paddle/pten/api/lib/api_registry.h"
 #include "paddle/pten/api/lib/kernel_dispatch.h"
-#include "paddle/pten/api/lib/utils/allocator.h"
+#include "paddle/pten/api/lib/utils/storage.h"
 #include "paddle/pten/core/kernel_registry.h"
 #include "paddle/pten/include/core.h"
 #include "paddle/pten/include/infermeta.h"
@@ -62,10 +62,10 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) {
   auto out_meta = UnchangedInferMeta(dense_x->meta());
 
   // 5. Prepare outputs
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          pten::TransToFluidPlace(backend));
-  auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
+  auto dense_out = std::make_shared<pten::DenseTensor>(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          pten::TransToFluidPlace(backend)),
+      std::move(out_meta));
   kernel_context.EmplaceBackOutput(dense_out);
   Tensor out;
   out.set_impl(dense_out);
diff --git a/paddle/pten/include/creation.h b/paddle/pten/include/creation.h
index a4f3a0464b35c..7341ea18917b8 100644
--- a/paddle/pten/include/creation.h
+++ b/paddle/pten/include/creation.h
@@ -14,7 +14,7 @@
 
 #pragma once
 
-#include "paddle/pten/api/lib/utils/allocator.h"
+#include "paddle/pten/api/lib/utils/storage.h"
 #include "paddle/pten/include/infermeta.h"
 #include "paddle/pten/kernels/cpu/creation.h"
 #include "paddle/pten/kernels/cuda/creation.h"
@@ -32,10 +32,10 @@ DenseTensor FillAnyLike(
     Backend backend = Backend::UNDEFINED,  // Is backend needed here?
     DataLayout layout = DataLayout::UNDEFINED) {
   auto out_meta = FullLikeInferMeta(x.meta(), dtype, layout);
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          dev_ctx.GetPlace());
-  pten::DenseTensor dense_out(allocator, out_meta);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(out_meta));
   FillAnyLike<T>(dev_ctx, val, &dense_out);
   return dense_out;
 }
diff --git a/paddle/pten/include/linalg.h b/paddle/pten/include/linalg.h
index b21645ef187a8..60ec451be2cc8 100644
--- a/paddle/pten/include/linalg.h
+++ b/paddle/pten/include/linalg.h
@@ -15,7 +15,7 @@
 #pragma once
 
 // See Note: [ How do we organize the kernel directory ]
-#include "paddle/pten/api/lib/utils/allocator.h"
+#include "paddle/pten/api/lib/utils/storage.h"
 #include "paddle/pten/include/infermeta.h"
 #include "paddle/pten/kernels/cpu/linalg.h"
 #include "paddle/pten/kernels/cuda/linalg.h"
@@ -27,10 +27,10 @@ DenseTensor Dot(const ContextT& dev_ctx,
                 const DenseTensor& x,
                 const DenseTensor& y) {
   auto out_meta = DotInferMeta(x.meta(), y.meta());
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          dev_ctx.GetPlace());
-  pten::DenseTensor dense_out(allocator, out_meta);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(out_meta));
   Dot<T>(dev_ctx, x, y, &dense_out);
   return dense_out;
 }
diff --git a/paddle/pten/include/manipulation.h b/paddle/pten/include/manipulation.h
index e694a89f700cf..e138c51e307c4 100644
--- a/paddle/pten/include/manipulation.h
+++ b/paddle/pten/include/manipulation.h
@@ -15,7 +15,7 @@
 #pragma once
 
 // See Note: [ How do we organize the kernel directory ]
-#include "paddle/pten/api/lib/utils/allocator.h"
+#include "paddle/pten/api/lib/utils/storage.h"
 #include "paddle/pten/include/infermeta.h"
 #include "paddle/pten/kernels/cpu/manipulation.h"
 #include "paddle/pten/kernels/cuda/manipulation.h"
@@ -29,10 +29,10 @@ DenseTensor Flatten(const ContextT& dev_ctx,
                     int start_axis,
                     int stop_axis) {
   auto out_meta = FlattenInferMeta(x.meta(), start_axis, stop_axis);
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          dev_ctx.GetPlace());
-  pten::DenseTensor dense_out(allocator, out_meta);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(out_meta));
   Flatten<T>(dev_ctx, x, start_axis, stop_axis, &dense_out);
   return dense_out;
 }
@@ -43,10 +43,10 @@ DenseTensor Cast(const ContextT& dev_ctx,
                  DataType out_dtype,
                  DataType in_dtype) {
   auto out_meta = CastInferMeta(x.meta(), out_dtype);
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          dev_ctx.GetPlace());
-  pten::DenseTensor dense_out(allocator, out_meta);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(out_meta));
   Cast<T>(dev_ctx, x, out_dtype, in_dtype, &dense_out);
   return dense_out;
 }
@@ -56,10 +56,10 @@ DenseTensor Reshape(const ContextT& dev_ctx,
                     const DenseTensor& x,
                     const std::vector<int64_t>& shape) {
   auto out_meta = InferMetaFromVecValue(x.meta(), shape);
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          dev_ctx.GetPlace());
-  pten::DenseTensor dense_out(allocator, out_meta);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(out_meta));
   Reshape(dev_ctx, x, ScalarArray(shape), &dense_out);
   return dense_out;
 }
diff --git a/paddle/pten/include/math.h b/paddle/pten/include/math.h
index c2b9f75bda044..1afad968ef577 100644
--- a/paddle/pten/include/math.h
+++ b/paddle/pten/include/math.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
 
 // See Note: [ How do we organize the kernel directory ]
-#include "paddle/pten/api/lib/utils/allocator.h"
+#include "paddle/pten/api/lib/utils/storage.h"
 #include "paddle/pten/include/infermeta.h"
 #include "paddle/pten/kernels/cpu/math.h"
 #include "paddle/pten/kernels/cuda/math.h"
@@ -25,10 +25,10 @@ namespace pten {
 template <typename T, typename ContextT>
 DenseTensor Sign(const ContextT& dev_ctx, const DenseTensor& x) {
   auto out_meta = UnchangedInferMeta(x.meta());
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          dev_ctx.GetPlace());
-  pten::DenseTensor dense_out(allocator, out_meta);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(out_meta));
   Sign<T>(dev_ctx, x, &dense_out);
   return dense_out;
 }
@@ -39,10 +39,10 @@ DenseTensor Mean(const ContextT& dev_ctx,
                  const std::vector<int64_t>& axis,
                  bool keep_dim) {
   auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim);
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          dev_ctx.GetPlace());
-  pten::DenseTensor dense_out(allocator, out_meta);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(out_meta));
   bool reduce_all = false;
   DataType out_dtype = pten::DataType::UNDEFINED;
   Mean<T>(
@@ -57,10 +57,10 @@ DenseTensor Sum(const ContextT& dev_ctx,
                 DataType dtype,
                 bool keep_dim) {
   auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim);
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          dev_ctx.GetPlace());
-  pten::DenseTensor dense_out(allocator, out_meta);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      out_meta);
 
   // The real value of reduce_all will be get in kernel
   // so use default value(false) is OK.
@@ -82,10 +82,10 @@ DenseTensor Scale(const ContextT& dev_ctx,
                   float bias,
                   bool bias_after_scale) {
   auto out_meta = UnchangedInferMeta(x.meta());
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          dev_ctx.GetPlace());
-  pten::DenseTensor dense_out(allocator, out_meta);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(out_meta));
   Scale<T>(dev_ctx, x, scale, bias, bias_after_scale, &dense_out);
   return dense_out;
 }
@@ -96,10 +96,10 @@ DenseTensor Add(const ContextT& dev_ctx,
                 const DenseTensor& y,
                 int axis) {
   auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          dev_ctx.GetPlace());
-  pten::DenseTensor dense_out(allocator, out_meta);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(out_meta));
   ElementwiseAdd<T>(dev_ctx, x, y, axis, &dense_out);
   return dense_out;
 }
@@ -110,10 +110,10 @@ DenseTensor Subtract(const ContextT& dev_ctx,
                      const DenseTensor& y,
                      int axis) {
   auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          dev_ctx.GetPlace());
-  pten::DenseTensor dense_out(allocator, out_meta);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(out_meta));
   ElementwiseSub<T>(dev_ctx, x, y, axis, &dense_out);
   return dense_out;
 }
@@ -124,10 +124,10 @@ DenseTensor Divide(const ContextT& dev_ctx,
                    const DenseTensor& y,
                    int axis) {
   auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          dev_ctx.GetPlace());
-  pten::DenseTensor dense_out(allocator, out_meta);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(out_meta));
   ElementwiseDiv<T>(dev_ctx, x, y, axis, &dense_out);
   return dense_out;
 }
@@ -138,10 +138,10 @@ DenseTensor Multiply(const ContextT& dev_ctx,
                      const DenseTensor& y,
                      int axis) {
   auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          dev_ctx.GetPlace());
-  pten::DenseTensor dense_out(allocator, out_meta);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(out_meta));
   ElementwiseMul<T>(dev_ctx, x, y, axis, &dense_out);
   return dense_out;
 }
diff --git a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h
index 16fc70b9ab7a1..e7aecf3b27aaf 100644
--- a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h
+++ b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h
@@ -804,10 +804,9 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
   // temp_output should be stored temp_data in output_data space or stored in
   // y_data;
   pten::DDim tmp_ddim;
-  const auto alloc =
-      std::make_shared<paddle::experimental::DefaultAllocator>(y->place());
   pten::DenseTensor tmp = pten::DenseTensor(
-      alloc, pten::DenseTensorMeta(y->dtype(), tmp_ddim, y->layout()));
+      pten::make_intrusive<paddle::experimental::SharedStorage>(y->place()),
+      pten::DenseTensorMeta(y->dtype(), tmp_ddim, y->layout()));
 
   auto x_data = x.data<Tx>();
   auto y_data = y->mutable_data<Ty>();
@@ -847,10 +846,8 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
                               reducer.initial(),
                               stream);
     // framework::Tensor tmp;
-    const auto alloc =
-        std::make_shared<paddle::experimental::DefaultAllocator>(x.place());
     pten::DenseTensor tmp = pten::DenseTensor(
-        alloc,
+        pten::make_intrusive<paddle::experimental::SharedStorage>(x.place()),
         pten::DenseTensorMeta(pten::DataType::UINT8,
                               paddle::framework::make_ddim(
                                   {static_cast<int64_t>(temp_storage_bytes)}),
diff --git a/paddle/pten/kernels/hybird/eigen/reduce.h b/paddle/pten/kernels/hybird/eigen/reduce.h
index e6ab872928c77..d60a416dfdb37 100644
--- a/paddle/pten/kernels/hybird/eigen/reduce.h
+++ b/paddle/pten/kernels/hybird/eigen/reduce.h
@@ -14,7 +14,7 @@
 
 #pragma once
 
-#include "paddle/pten/api/lib/utils/allocator.h"
+#include "paddle/pten/api/lib/utils/storage.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/kernels/hybird/eigen/common.h"
 #include "paddle/pten/kernels/hybird/transpose.h"
@@ -129,9 +129,9 @@ void HandleLargeDim(const DeviceContext& dev_ctx,
                     const std::vector<int64_t>& dims,
                     bool keep_dim) {
   //  shuffle the reduced dim to the end
-  const auto alloc =
-      std::make_shared<paddle::experimental::DefaultAllocator>(input.place());
-  pten::DenseTensor shuffled_input = pten::DenseTensor(alloc, input.meta());
+  pten::DenseTensor shuffled_input = pten::DenseTensor(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(input.place()),
+      input.meta());
 
   GetShuffledInput<DeviceContext, OutT>(dev_ctx, input, &shuffled_input, dims);
 
diff --git a/paddle/pten/kernels/hybird/general/reduce_impl.h b/paddle/pten/kernels/hybird/general/reduce_impl.h
index 50f40c5f2ca12..52bdf18ad5a31 100644
--- a/paddle/pten/kernels/hybird/general/reduce_impl.h
+++ b/paddle/pten/kernels/hybird/general/reduce_impl.h
@@ -53,10 +53,9 @@ void Reduce(const DeviceContext& dev_ctx,
               dev_ctx, x, out, dims, keep_dim, reduce_all);
         }));
   } else {
-    const auto alloc =
-        std::make_shared<paddle::experimental::DefaultAllocator>(x.place());
     pten::DenseTensor tmp_tensor = pten::DenseTensor(
-        alloc, pten::DenseTensorMeta(out_dtype, x.dims(), x.layout()));
+        pten::make_intrusive<paddle::experimental::SharedStorage>(x.place()),
+        pten::DenseTensorMeta(out_dtype, x.dims(), x.layout()));
 
     // cast x tensor to out_dtype first
     PD_VISIT_ALL_TYPES(out_dtype, "CastKernelImpl", ([&] {
diff --git a/paddle/pten/tests/api/scale_api.h b/paddle/pten/tests/api/scale_api.h
index 565bb0f139d9d..5668cbe29439c 100644
--- a/paddle/pten/tests/api/scale_api.h
+++ b/paddle/pten/tests/api/scale_api.h
@@ -71,11 +71,10 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x,
   kernel_context.EmplaceBackAttr(bias_after_scale);
 
   auto out_meta = pten::UnchangedInferMeta(dense_x->meta());
-
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          pten::TransToFluidPlace(kernel_backend));
-  auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
+  auto dense_out = std::make_shared<pten::DenseTensor>(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          pten::TransToFluidPlace(kernel_backend)),
+      std::move(out_meta));
   kernel_context.EmplaceBackOutput(dense_out);
 
   Tensor out;
@@ -238,10 +237,10 @@ Tensor scale_switch_case(const Tensor& x,
   auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
 
   auto out_meta = pten::UnchangedInferMeta(dense_x->meta());
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          pten::TransToFluidPlace(kernel_backend));
-  auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
+  auto dense_out = std::make_shared<pten::DenseTensor>(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          pten::TransToFluidPlace(kernel_backend)),
+      std::move(out_meta));
 
   Tensor out;
   out.set_impl(dense_out);
diff --git a/python/paddle/utils/code_gen/api_gen.py b/python/paddle/utils/code_gen/api_gen.py
index c7e04301ca592..029985475011e 100644
--- a/python/paddle/utils/code_gen/api_gen.py
+++ b/python/paddle/utils/code_gen/api_gen.py
@@ -303,10 +303,10 @@ def gene_api_code(self):
   auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
 {input_tensors}
 {self.gene_infer_meta(self.args['inputs']['names'], self.args['attrs']['names'], self.infer_meta)}
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          pten::TransToFluidPlace(kernel_backend));
-  auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
+  auto dense_out = std::make_shared<pten::DenseTensor>(
+        pten::make_intrusive<paddle::experimental::SharedStorage>(
+            pten::TransToFluidPlace(kernel_backend)),
+        std::move(out_meta));
 
   Tensor out;
   out.set_impl(dense_out);
@@ -345,7 +345,7 @@ def source_include(header_file_path):
 #include "paddle/pten/api/lib/api_registry.h"
 #include "paddle/pten/api/lib/kernel_declare.h"
 #include "paddle/pten/api/lib/kernel_dispatch.h"
-#include "paddle/pten/api/lib/utils/allocator.h"
+#include "paddle/pten/api/lib/utils/storage.h"
 #include "paddle/pten/core/kernel_registry.h"
 #include "paddle/pten/include/core.h"
 #include "paddle/pten/include/infermeta.h"