From 5240ac0c5aa9c5118584301f0a6d992c3d319170 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Tue, 26 Oct 2021 08:46:16 +0800 Subject: [PATCH] Dev/op2func refactor 3 (#30) * add a candidate dense tensor class, test=develop * remove TensorBase::backend(), test=develop * remove some ops, test=develop * cherry-pick the pr of tensor meta, test=develop * moves the dense tensor and some ops, test=develop * update the linalg operator, test=develop * update other operators, test=develop * fix errors, test=develop * fix bugs, test=develop * try to resolve the problem of windows ci, test=develop * updates codes, test=develop * fix the tensor_utils.cc, test=develop * modify the dense tensor, test=develop * fix the data type, test=develop Co-authored-by: shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> --- paddle/fluid/framework/CMakeLists.txt | 8 +- paddle/fluid/framework/operator.cc | 12 +- paddle/fluid/framework/pten_utils.cc | 142 ---------- paddle/fluid/framework/pten_utils.h | 30 +- paddle/fluid/framework/pten_utils_test.cc | 60 ---- paddle/fluid/imperative/prepared_operator.cc | 14 +- paddle/fluid/operators/CMakeLists.txt | 5 +- paddle/fluid/operators/dot_op.h | 11 +- paddle/fluid/operators/fill_any_like_op.h | 6 +- paddle/fluid/operators/mean_op.cu | 1 + paddle/fluid/operators/mean_op.h | 7 +- paddle/fluid/operators/scale_op.h | 8 +- paddle/fluid/operators/sign_op.h | 6 +- paddle/pten/common/data_type.h | 16 +- paddle/pten/core/CMakeLists.txt | 10 +- paddle/pten/core/candidate/CMakeLists.txt | 1 - paddle/pten/core/candidate/dense_tensor.cc | 145 ---------- paddle/pten/core/candidate/dense_tensor.h | 188 ------------- paddle/pten/core/dense_tensor.cc | 190 +++++++------ paddle/pten/core/dense_tensor.h | 256 ++++++++++-------- paddle/pten/core/tensor_base.h | 2 - paddle/pten/core/tensor_meta.h | 152 ++++------- paddle/pten/hapi/CMakeLists.txt | 2 +- paddle/pten/hapi/lib/creation.cc | 9 +- paddle/pten/hapi/lib/linalg.cc | 6 +- paddle/pten/hapi/lib/manipulation.cc | 6 +- paddle/pten/hapi/lib/math.cc | 7 +- paddle/pten/hapi/lib/utils/CMakeLists.txt | 3 +- paddle/pten/hapi/lib/utils/tensor_utils.cc | 110 +++++++- paddle/pten/hapi/lib/utils/tensor_utils.h | 58 +--- .../hapi/lib/utils/tests/test_tensor_utils.cc | 29 +- paddle/pten/infershape/binary.cc | 6 +- paddle/pten/infershape/binary.h | 14 +- paddle/pten/infershape/unary.cc | 18 +- paddle/pten/infershape/unary.h | 21 +- paddle/pten/kernels/cpu/CMakeLists.txt | 2 +- paddle/pten/kernels/cpu/manipulation.cc | 6 +- paddle/pten/kernels/cpu/utils.cc | 3 +- paddle/pten/kernels/cuda/CMakeLists.txt | 4 +- paddle/pten/kernels/cuda/manipulation.cu | 6 +- paddle/pten/kernels/cuda/math.cu | 24 +- paddle/pten/kernels/cuda/utils.cu | 3 +- paddle/pten/kernels/functions/eigen/dot.h | 1 - paddle/pten/kernels/functions/eigen/mean.h | 2 - paddle/pten/tests/CMakeLists.txt | 10 +- paddle/pten/tests/dense_tensor_test.cc | 13 - paddle/pten/tests/test_copy_api.cc | 21 +- paddle/pten/tests/test_dot_api.cc | 21 +- paddle/pten/tests/test_fill_api.cc | 39 +-- paddle/pten/tests/test_flatten_api.cc | 12 +- paddle/pten/tests/test_mean_api.cc | 12 +- 51 files changed, 632 insertions(+), 1106 deletions(-) delete mode 100644 paddle/pten/core/candidate/CMakeLists.txt delete mode 100644 paddle/pten/core/candidate/dense_tensor.cc delete mode 100644 paddle/pten/core/candidate/dense_tensor.h diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 231105628dd7c..889925c6fdd39 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -195,10 +195,12 @@ cc_library(unused_var_check SRCS unused_var_check.cc DEPS glog no_need_buffer_va IF(WITH_XPU) cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto - shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils pten pten_utils) + shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils + pten pten_utils kernel_factory) ELSE() cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto - shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils pten pten_utils) + shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils + pten pten_utils kernel_factory) ENDIF() cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context) @@ -392,7 +394,7 @@ cc_library(save_load_util SRCS save_load_util.cc DEPS tensor scope layer) cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tensor scope layer) cc_library(generator SRCS generator.cc DEPS enforce place) -cc_library(pten_utils SRCS pten_utils.cc DEPS lod_tensor selected_rows place pten var_type_traits) +cc_library(pten_utils SRCS pten_utils.cc DEPS lod_tensor selected_rows place pten var_type_traits pten_hapi_utils) # Get the current working branch execute_process( diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 7c63f7c76c921..f8ec13f1d8b98 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -1819,10 +1819,10 @@ pten::KernelContext OperatorWithKernel::BuildPtenKernelContext( paddle::SmallVector> tmp_inputs; for (auto var : ins_vector) { - auto pt_in = framework::InputVariableToPtenTensor(*var, in_def); - tmp_inputs.emplace_back(pt_in); + tmp_inputs.emplace_back( + experimental::MakePtenTensorBaseFromVar(*var, in_def)); } - op_kernel_ctx.EmplaceBackInputs(tmp_inputs); + op_kernel_ctx.EmplaceBackInputs(std::move(tmp_inputs)); } for (size_t i = 0; i < output_names.size(); ++i) { @@ -1831,10 +1831,10 @@ pten::KernelContext OperatorWithKernel::BuildPtenKernelContext( paddle::SmallVector> tmp_outputs; for (auto var : outs_vector) { - auto pt_out = framework::OutputVariableToPtenTensor(var, out_def); - tmp_outputs.emplace_back(pt_out); + tmp_outputs.emplace_back( + experimental::MakePtenTensorBaseFromVar(var, out_def)); } - op_kernel_ctx.EmplaceBackOutputs(tmp_outputs); + op_kernel_ctx.EmplaceBackOutputs(std::move(tmp_outputs)); } for (size_t i = 0; i < attr_names.size(); ++i) { diff --git a/paddle/fluid/framework/pten_utils.cc b/paddle/fluid/framework/pten_utils.cc index 9dac142557ed4..96408afc100e9 100644 --- a/paddle/fluid/framework/pten_utils.cc +++ b/paddle/fluid/framework/pten_utils.cc @@ -24,148 +24,6 @@ limitations under the License. */ namespace paddle { namespace framework { -// TODO(chenweihang, shixiaowei): adapt SelectedRows -template <> -std::shared_ptr MakeTensorImpl( - const LoDTensor& tensor, pten::Backend backend, - paddle::experimental::DataType dtype, - paddle::experimental::DataLayout layout) { - auto holder = tensor.Holder(); - auto tensor_impl = std::make_shared( - pten::TensorMeta(tensor.dims(), backend, dtype, layout, tensor.offset()), - pten::TensorStatus()); - - if (holder != nullptr) { - tensor_impl->ShareAllocation(tensor.Holder()); - } - return tensor_impl; -} - -template <> -std::shared_ptr MakeTensorImpl( - const Tensor& tensor, pten::Backend backend, - paddle::experimental::DataType dtype, - paddle::experimental::DataLayout layout) { - auto holder = tensor.Holder(); - auto tensor_impl = std::make_shared( - pten::TensorMeta(tensor.dims(), backend, dtype, layout, tensor.offset()), - pten::TensorStatus()); - - if (holder != nullptr) { - tensor_impl->ShareAllocation(tensor.Holder()); - } - return tensor_impl; -} - -template <> -std::shared_ptr MakeTensorImpl( - const LoDTensor& tensor, const platform::Place& place, - proto::VarType::Type type) { - return MakeTensorImpl( - tensor, pten::TransToPtenBackend(place), pten::TransToPtenDataType(type), - pten::TransToPtenDataLayout(tensor.layout())); -} - -template <> -std::shared_ptr MakeTensorImpl( - const Tensor& tensor, const platform::Place& place, - proto::VarType::Type type) { - return MakeTensorImpl( - tensor, pten::TransToPtenBackend(place), pten::TransToPtenDataType(type), - pten::TransToPtenDataLayout(tensor.layout())); -} - -template <> -void ShareTensorImpl(pten::DenseTensor* tensor_impl, - LoDTensor* out) { - out->ResetHolderWithType(tensor_impl->allocation(), - pten::TransToProtoVarType(tensor_impl->data_type())); -} - -template <> -void ShareTensorImpl(pten::DenseTensor* tensor_impl, - Tensor* out) { - out->ResetHolderWithType(tensor_impl->allocation(), - pten::TransToProtoVarType(tensor_impl->data_type())); -} - -std::shared_ptr InputVariableToPtenTensor( - const framework::Variable& variable, const pten::TensorArgDef& arg_def) { - auto expected_place = pten::TransToFluidPlace(arg_def.backend); - - if (variable.template IsType()) { - const auto& tensor = variable.template Get(); - if (!platform::is_same_place(tensor.place(), expected_place)) { - framework::LoDTensor tmp_tensor; - framework::TensorCopySync(tensor, expected_place, &tmp_tensor); - auto pt_in = - framework::MakeTensorImpl( - tmp_tensor, arg_def.backend, arg_def.dtype, arg_def.layout); - return pt_in; - } else { - auto pt_in = - framework::MakeTensorImpl( - tensor, arg_def.backend, arg_def.dtype, arg_def.layout); - return pt_in; - } - } else if (variable.template IsType()) { - // TODO(chenweihang): now we don't deal with row and height - // by xiaowei's advice - const auto& tensor = variable.template Get(); - if (!platform::is_same_place(tensor.value().place(), expected_place)) { - framework::Tensor tmp_tensor; - TensorCopySync(tensor.value(), expected_place, &tmp_tensor); - // TODO(chenweihang): adapt SelectedRows by xiaowei's design - auto pt_in = - framework::MakeTensorImpl( - tmp_tensor, arg_def.backend, arg_def.dtype, arg_def.layout); - return pt_in; - } else { - auto pt_in = - framework::MakeTensorImpl( - tensor.value(), arg_def.backend, arg_def.dtype, arg_def.layout); - return pt_in; - } - } else { - PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported shared input `%s` type now when call pt kernel.", - framework::ToTypeName(variable.Type()))); - } - return nullptr; -} - -std::shared_ptr OutputVariableToPtenTensor( - framework::Variable* variable, const pten::TensorArgDef& arg_def) { - // mutable_data before run kernel, to avoid share output form - // KernelContext to original tensor - if (variable->template IsType()) { - auto* tensor = variable->template GetMutable(); - tensor->mutable_data(pten::TransToFluidPlace(arg_def.backend), - pten::TransToProtoVarType(arg_def.dtype)); - auto pt_out = - framework::MakeTensorImpl( - *tensor, arg_def.backend, arg_def.dtype, arg_def.layout); - return pt_out; - } else if (variable->template IsType()) { - auto* tensor = variable->template GetMutable(); - tensor->mutable_value()->mutable_data( - pten::TransToFluidPlace(arg_def.backend), - pten::TransToProtoVarType(arg_def.dtype)); - // TODO(chenweihang): adapt SelectedRows by xiaowei's design, - // here the row and height will lost in output! - auto pt_out = - framework::MakeTensorImpl( - tensor->value(), arg_def.backend, arg_def.dtype, arg_def.layout); - return pt_out; - } else { - PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported shared output `%s` type now when call pt kernel.", - framework::ToTypeName(variable->Type()))); - } - - return nullptr; -} - OpKernelType TransPtenKernelKeyToOpKernelType( const pten::KernelKey& kernel_key) { proto::VarType::Type data_type = diff --git a/paddle/fluid/framework/pten_utils.h b/paddle/fluid/framework/pten_utils.h index 263101657ceb9..8c1c25b3b67cd 100644 --- a/paddle/fluid/framework/pten_utils.h +++ b/paddle/fluid/framework/pten_utils.h @@ -25,41 +25,13 @@ limitations under the License. */ #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/place.h" #include "paddle/pten/api/include/core.h" +#include "paddle/pten/hapi/lib/utils/tensor_utils.h" #include "paddle/utils/flat_hash_map.h" #include "paddle/utils/small_vector.h" namespace paddle { namespace framework { -/* tensor translate */ - -template -std::shared_ptr MakeTensorImpl( - const VariableT& tensor, pten::Backend backend, - paddle::experimental::DataType dtype, - paddle::experimental::DataLayout layout); - -template -std::shared_ptr MakeTensorImpl(const LoDTensor& tensor, - const platform::Place& place, - proto::VarType::Type type); - -template -std::shared_ptr MakeTensorImpl(const Tensor& tensor, - const platform::Place& place, - proto::VarType::Type type); - -template -void ShareTensorImpl(PtenTensorImplT* tensor_impl, LoDTensor* out); - -template -void ShareTensorImpl(PtenTensorImplT* tensor_impl, Tensor* out); - -std::shared_ptr InputVariableToPtenTensor( - const framework::Variable& variable, const pten::TensorArgDef& arg_def); -std::shared_ptr OutputVariableToPtenTensor( - framework::Variable* variable, const pten::TensorArgDef& arg_def); - /* Kernel Key translate */ OpKernelType TransPtenKernelKeyToOpKernelType( diff --git a/paddle/fluid/framework/pten_utils_test.cc b/paddle/fluid/framework/pten_utils_test.cc index 33c55a8086b4e..ab2d60a34303a 100644 --- a/paddle/fluid/framework/pten_utils_test.cc +++ b/paddle/fluid/framework/pten_utils_test.cc @@ -18,66 +18,6 @@ limitations under the License. */ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/variable.h" -TEST(PtenUtils, FluidTensorToPtenTensor) { - // 1. create tensor - paddle::framework::LoDTensor x; - paddle::framework::Tensor x2; - x.Resize({2}); - x.mutable_data(paddle::platform::CPUPlace()); - x.data()[0] = 0.2; - x.data()[1] = 0.5; - - // 2. test API - auto dense_x = paddle::framework::MakeTensorImpl( - x, x.place(), x.type()); - - // 3. check result - std::vector expect_value = {0.2, 0.5}; - ASSERT_EQ(dense_x->data()[0], expect_value[0]); - ASSERT_EQ(dense_x->data()[1], expect_value[1]); - ASSERT_EQ(dense_x->backend(), pten::Backend::CPU); - ASSERT_EQ(dense_x->data_type(), pten::DataType::FLOAT32); -} - -TEST(PtenUtils, VarToPtenTensor) { - // 1. create Variable - paddle::framework::Variable v; - auto selected_rows = v.GetMutable(); - paddle::framework::Tensor* value = selected_rows->mutable_value(); - auto* data = value->mutable_data(paddle::framework::make_ddim({1, 1}), - paddle::platform::CPUPlace()); - data[0] = 123; - pten::Backend expect_backend = pten::Backend::CPU; - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - expect_backend = pten::Backend::CUDA; -#endif - auto tensor_def = pten::TensorArgDef(expect_backend, pten::DataLayout::NCHW, - pten::DataType::INT32); - // 2. test API - auto tensor_x = paddle::framework::InputVariableToPtenTensor(v, tensor_def); - // 3. check result - ASSERT_EQ(tensor_x->backend(), expect_backend); - ASSERT_EQ(tensor_x->data_type(), pten::DataType::INT32); -} - -TEST(PtenUtils, PtenTensorToFluidTensor) { - pten::DenseTensor dense_tensor( - pten::TensorMeta(paddle::framework::make_ddim({1, 1}), pten::Backend::CPU, - pten::DataType::FLOAT32, pten::DataLayout::ANY), - pten::TensorStatus()); - auto* data_ptr = dense_tensor.mutable_data(); - data_ptr[0] = 0.5; - // share allocation into fluid Tensor - paddle::framework::Tensor tensor; - paddle::framework::LoDTensor lod_tensor; - paddle::framework::ShareTensorImpl(&dense_tensor, &tensor); - paddle::framework::ShareTensorImpl(&dense_tensor, &lod_tensor); - // compare - ASSERT_EQ(tensor.data()[0], 0.5); - ASSERT_EQ(lod_tensor.data()[0], 0.5); -} - TEST(PtenUtils, TransPtenKernelKeyToOpKernelType) { pten::KernelKey kernel_key(pten::Backend::CPU, pten::DataLayout::NCHW, pten::DataType::FLOAT32); diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index 2ffb47273f650..f2251e34fb029 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -293,11 +293,10 @@ static pten::KernelContext BuildDygraphPtenKernelContext( paddle::SmallVector> tmp_inputs; for (auto var : ins_vector) { const auto& variable = var->Var(); - - auto pt_in = framework::InputVariableToPtenTensor(variable, in_def); - tmp_inputs.emplace_back(pt_in); + tmp_inputs.emplace_back( + experimental::MakePtenTensorBaseFromVar(variable, in_def)); } - op_kernel_ctx.EmplaceBackInputs(tmp_inputs); + op_kernel_ctx.EmplaceBackInputs(std::move(tmp_inputs)); } for (size_t i = 0; i < output_names.size(); ++i) { @@ -307,11 +306,10 @@ static pten::KernelContext BuildDygraphPtenKernelContext( paddle::SmallVector> tmp_outputs; for (auto var : outs_vector) { auto* variable = var->MutableVar(); - - auto pt_out = framework::OutputVariableToPtenTensor(variable, out_def); - tmp_outputs.emplace_back(pt_out); + tmp_outputs.emplace_back( + experimental::MakePtenTensorBaseFromVar(variable, out_def)); } - op_kernel_ctx.EmplaceBackOutputs(tmp_outputs); + op_kernel_ctx.EmplaceBackOutputs(std::move(tmp_outputs)); } for (size_t i = 0; i < attr_names.size(); ++i) { diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index adbd9bf277b11..bafc650c433db 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -80,8 +80,9 @@ if(WITH_UNITY_BUILD) endif() set(OP_HEADER_DEPS ${OP_HEADER_DEPS} pten) -set(OP_HEADER_DEPS ${OP_HEADER_DEPS} pten_utils) -register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op load_combine_op lstm_op run_program_op eye_op +#set(OP_HEADER_DEPS ${OP_HEADER_DEPS} pten_utils) +register_operators(EXCLUDES +py_layer_op py_func_op warpctc_op dgc_op load_combine_op lstm_op run_program_op eye_op recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op spectral_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS}) op_library(run_program_op SRCS run_program_op.cc run_program_op.cu.cc DEPS executor_cache ${OP_HEADER_DEPS}) diff --git a/paddle/fluid/operators/dot_op.h b/paddle/fluid/operators/dot_op.h index 641b0d653d5b0..6a025fdd9ccc6 100644 --- a/paddle/fluid/operators/dot_op.h +++ b/paddle/fluid/operators/dot_op.h @@ -16,13 +16,13 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/platform/for_range.h" // only can include the headers in paddle/pten/api dirs #include "paddle/pten/api/include/core.h" #include "paddle/pten/api/include/linalg.h" +#include "paddle/pten/hapi/lib/utils/tensor_utils.h" namespace paddle { namespace operators { @@ -244,12 +244,9 @@ class DotKernel : public framework::OpKernel { auto& dev_ctx = ctx.device_context(); out->mutable_data(x->place()); - auto pt_x = - framework::MakeTensorImpl(*x, x->place(), x->type()); - auto pt_y = - framework::MakeTensorImpl(*y, y->place(), y->type()); - auto pt_out = framework::MakeTensorImpl(*out, x->place(), - x->type()); + auto pt_x = paddle::experimental::MakePtenDenseTensor(*x); + auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); + auto pt_out = paddle::experimental::MakePtenDenseTensor(*out); // call new kernel pten::Dot(dev_ctx, *pt_x.get(), *pt_y.get(), pt_out.get()); diff --git a/paddle/fluid/operators/fill_any_like_op.h b/paddle/fluid/operators/fill_any_like_op.h index 73170c6e2e277..fc649f42c51a1 100644 --- a/paddle/fluid/operators/fill_any_like_op.h +++ b/paddle/fluid/operators/fill_any_like_op.h @@ -62,10 +62,8 @@ class FillAnyLikeKernel : public framework::OpKernel { std::isnan(value), false, platform::errors::InvalidArgument("The filled value is NaN.")); - auto pt_x = framework::MakeTensorImpl(*in, in->place(), - in->type()); - auto pt_out = framework::MakeTensorImpl( - *out, out->place(), out->type()); + auto pt_x = paddle::experimental::MakePtenDenseTensor(*in); + auto pt_out = paddle::experimental::MakePtenDenseTensor(*out); const auto& dev_ctx = context.template device_context(); // call new kernel diff --git a/paddle/fluid/operators/mean_op.cu b/paddle/fluid/operators/mean_op.cu index ffb667ba974b8..26c844392d4d7 100644 --- a/paddle/fluid/operators/mean_op.cu +++ b/paddle/fluid/operators/mean_op.cu @@ -62,6 +62,7 @@ class MeanCUDAGradKernel : public framework::OpKernel { namespace ops = paddle::operators; namespace plat = paddle::platform; + REGISTER_OP_CUDA_KERNEL( mean, ops::MeanKernel, ops::MeanKernel, diff --git a/paddle/fluid/operators/mean_op.h b/paddle/fluid/operators/mean_op.h index 9a8c2736589c9..9d9954a8412a3 100644 --- a/paddle/fluid/operators/mean_op.h +++ b/paddle/fluid/operators/mean_op.h @@ -20,6 +20,7 @@ limitations under the License. */ // only can include the headers in paddle/top/api dirs #include "paddle/pten/api/include/core.h" #include "paddle/pten/api/include/math.h" +#include "paddle/pten/hapi/lib/utils/tensor_utils.h" namespace paddle { namespace operators { @@ -61,10 +62,8 @@ class MeanKernel : public framework::OpKernel { auto& dev_ctx = context.device_context(); out->mutable_data(x->place()); - auto pt_x = - framework::MakeTensorImpl(*x, x->place(), x->type()); - auto pt_out = framework::MakeTensorImpl(*out, x->place(), - x->type()); + auto pt_x = paddle::experimental::MakePtenDenseTensor(*x); + auto pt_out = paddle::experimental::MakePtenDenseTensor(*out); // call new kernel VLOG(1) << "chenweihang: call original mean kernel compute."; diff --git a/paddle/fluid/operators/scale_op.h b/paddle/fluid/operators/scale_op.h index 9a043361678b2..0d7113a6f4de9 100644 --- a/paddle/fluid/operators/scale_op.h +++ b/paddle/fluid/operators/scale_op.h @@ -20,6 +20,7 @@ limitations under the License. */ // only can include the headers in paddle/top/api dirs #include "paddle/pten/api/include/core.h" #include "paddle/pten/api/include/math.h" +#include "paddle/pten/hapi/lib/utils/tensor_utils.h" namespace paddle { namespace operators { @@ -60,16 +61,13 @@ class ScaleKernel : public framework::OpKernel { out_slr->set_rows(in_slr.rows()); out_slr->set_height(in_slr.height()); } - auto* out = framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(out_var); out->mutable_data(in->place()); auto& dev_ctx = ctx.device_context(); - auto pt_x = framework::MakeTensorImpl(*in, in->place(), - in->type()); - auto pt_out = framework::MakeTensorImpl( - *out, in->place(), in->type()); + auto pt_x = paddle::experimental::MakePtenDenseTensor(*in); + auto pt_out = paddle::experimental::MakePtenDenseTensor(*out); // call new kernel pten::Scale(dev_ctx, *pt_x.get(), scale, bias, bias_after_scale, diff --git a/paddle/fluid/operators/sign_op.h b/paddle/fluid/operators/sign_op.h index f3083f4937875..0e3036115e3c1 100644 --- a/paddle/fluid/operators/sign_op.h +++ b/paddle/fluid/operators/sign_op.h @@ -36,10 +36,8 @@ class SignKernel : public framework::OpKernel { auto& dev_ctx = context.device_context(); out->mutable_data(x->place()); - auto pt_x = - framework::MakeTensorImpl(*x, x->place(), x->type()); - auto pt_out = framework::MakeTensorImpl(*out, x->place(), - x->type()); + auto pt_x = paddle::experimental::MakePtenDenseTensor(*x); + auto pt_out = paddle::experimental::MakePtenDenseTensor(*out); // call new kernel pten::Sign(dev_ctx, *pt_x.get(), pt_out.get()); diff --git a/paddle/pten/common/data_type.h b/paddle/pten/common/data_type.h index f5383da31cf93..27ca28b273485 100644 --- a/paddle/pten/common/data_type.h +++ b/paddle/pten/common/data_type.h @@ -54,6 +54,7 @@ inline size_t SizeOf(DataType data_type) { case DataType::UINT8: case DataType::INT8: return 1; + case DataType::BFLOAT16: case DataType::FLOAT16: case DataType::INT16: case DataType::UINT16: @@ -65,11 +66,11 @@ inline size_t SizeOf(DataType data_type) { case DataType::FLOAT64: case DataType::INT64: case DataType::UINT64: - return 8; - case DataType::UNDEFINED: - case DataType::BFLOAT16: case DataType::COMPLEX64: + return 8; case DataType::COMPLEX128: + return 16; + case DataType::UNDEFINED: case DataType::NUM_DATA_TYPES: PADDLE_THROW(platform::errors::Unimplemented( "Data type %d is not supported by tensor.", @@ -138,12 +139,21 @@ inline std::ostream& operator<<(std::ostream& os, DataType dtype) { case DataType::INT16: os << "int16"; break; + case DataType::UINT16: + os << "uint16"; + break; case DataType::INT32: os << "int32"; break; + case DataType::UINT32: + os << "uint32"; + break; case DataType::INT64: os << "int64"; break; + case DataType::UINT64: + os << "uint64"; + break; case DataType::BFLOAT16: os << "bfloat16"; break; diff --git a/paddle/pten/core/CMakeLists.txt b/paddle/pten/core/CMakeLists.txt index ca562332bb79f..a7ccf31467438 100644 --- a/paddle/pten/core/CMakeLists.txt +++ b/paddle/pten/core/CMakeLists.txt @@ -1,5 +1,3 @@ -add_subdirectory(candidate) - IF(WITH_MKLDNN) set(MKLDNN_CTX_DEPS mkldnn) ELSE() @@ -7,15 +5,15 @@ ELSE() ENDIF() if(WITH_GPU) - cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info) + cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info) elseif(WITH_ROCM) - cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info) + cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info) else() - cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place) + cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place) endif() -cc_library(dense_tensor SRCS dense_tensor.cc DEPS enforce data_type ddim allocator place convert_utils ${MKLDNN_CTX_DEPS}) cc_library(kernel_factory SRCS kernel_factory.cc DEPS enforce) cc_library(kernel_context SRCS kernel_context.cc DEPS enforce device_context) cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS enforce) +cc_library(dense_tensor SRCS dense_tensor.cc DEPS tensor_base) diff --git a/paddle/pten/core/candidate/CMakeLists.txt b/paddle/pten/core/candidate/CMakeLists.txt deleted file mode 100644 index dd670abdba1c1..0000000000000 --- a/paddle/pten/core/candidate/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -cc_library(pten_dense_tensor SRCS dense_tensor.cc DEPS tensor_base) diff --git a/paddle/pten/core/candidate/dense_tensor.cc b/paddle/pten/core/candidate/dense_tensor.cc deleted file mode 100644 index 325edd1ba077f..0000000000000 --- a/paddle/pten/core/candidate/dense_tensor.cc +++ /dev/null @@ -1,145 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/pten/core/candidate/dense_tensor.h" - -namespace pten { -namespace candidate { - -DenseTensorMeta::DenseTensorMeta(DataType type, const DDim& dims) - : dims(dims), type(type) {} -DenseTensorMeta::DenseTensorMeta(DataType type, - const DDim& dims, - DataLayout layout) - : dims(dims), type(type), layout(layout) {} -DenseTensorMeta::DenseTensorMeta(DataType type, - const DDim& dims, - DataLayout layout, - const std::vector>& lod) - : dims(dims), type(type), layout(layout), lod(lod) {} - -bool DenseTensorMeta::valid() const noexcept { - bool valid{true}; - valid = valid && (type != DataType::UNDEFINED); - valid = valid && (layout != DataLayout::UNDEFINED); - valid = valid && (is_scalar || product(dims)); - return valid; -} - -DenseTensor::DenseTensor(const std::shared_ptr& a, - const DenseTensorMeta& meta) - : meta_(meta), - storage_( - make_intrusive(a, SizeOf(data_type()) * numel())) {} - -DenseTensor::DenseTensor(const std::shared_ptr& a, - DenseTensorMeta&& meta) - : meta_(std::move(meta)), - storage_( - make_intrusive(a, SizeOf(data_type()) * numel())) {} - -DenseTensor::DenseTensor(intrusive_ptr storage, - const DenseTensorMeta& meta) - : meta_(meta), storage_(std::move(storage)) {} - -DenseTensor::DenseTensor(intrusive_ptr storage, DenseTensorMeta&& meta) - : meta_(std::move(meta)), storage_(std::move(storage)) {} - -int64_t DenseTensor::numel() const { - if (meta_.is_scalar) { - return 1; - } - return product(meta_.dims); -} - -bool DenseTensor::SharesStorageWith(const DenseTensor& b) const { - return storage_.get() == b.storage_.get() && storage_.get() != nullptr; -} - -template -T* DenseTensor::mutable_data(size_t request_bytes) { - PADDLE_ENFORCE( - valid(), - paddle::platform::errors::PreconditionNotMet( - "The meta data must be valid when call the mutable data function.")); - PADDLE_ENFORCE_NOT_NULL( - storage_, - paddle::platform::errors::PreconditionNotMet( - "The storage must be valid when call the mutable data function.")); - PADDLE_ENFORCE( - (data_type() == paddle::experimental::CppTypeToDataType::Type()), - paddle::platform::errors::PreconditionNotMet( - "The type of data we are trying to retrieve does not match the " - "type of data currently contained in the container.")); - size_t bytes = numel() * SizeOf(data_type()); - if (request_bytes) { - PADDLE_ENFORCE_GE(request_bytes, - bytes, - paddle::platform::errors::InvalidArgument( - "The reserved size %d should be enough to meet the " - "volume required by metadata %d.", - request_bytes, - bytes)); - bytes = request_bytes; - } - if (storage_->size() < bytes) { - storage_->Realloc(bytes); - } - return static_cast(storage_->data()); -} - -template -const T* DenseTensor::data() const { - PADDLE_ENFORCE_NOT_NULL( - storage_, - paddle::platform::errors::PreconditionNotMet( - "The storage must be valid when call the mutable data function.")); - PADDLE_ENFORCE( - (data_type() == paddle::experimental::CppTypeToDataType::Type()), - paddle::platform::errors::PreconditionNotMet( - "The type of data we are trying to retrieve does not match the " - "type of data currently contained in the container.")); - return static_cast(storage_->data()); -} - -void DenseTensor::check_memory_size() const { - size_t bytes = numel() * SizeOf(data_type()); - PADDLE_ENFORCE_GE(memory_size(), - bytes, - paddle::platform::errors::InvalidArgument( - "The memory size %d should be enough to meet the " - "volume required by metadata %d.", - memory_size(), - bytes)); -} - -#define DATA_MEMBER_FUNC_INSTANTIATION(dtype) \ - template dtype* DenseTensor::mutable_data(size_t request_bytes); \ - template const dtype* DenseTensor::data() const; - -DATA_MEMBER_FUNC_INSTANTIATION(int8_t); -DATA_MEMBER_FUNC_INSTANTIATION(uint8_t); -DATA_MEMBER_FUNC_INSTANTIATION(int16_t); -DATA_MEMBER_FUNC_INSTANTIATION(uint16_t); -DATA_MEMBER_FUNC_INSTANTIATION(int32_t); -DATA_MEMBER_FUNC_INSTANTIATION(uint32_t); -DATA_MEMBER_FUNC_INSTANTIATION(int64_t); -DATA_MEMBER_FUNC_INSTANTIATION(uint64_t); -DATA_MEMBER_FUNC_INSTANTIATION(float); -DATA_MEMBER_FUNC_INSTANTIATION(double); - -#undef DATA_MEMBER_FUNC_INSTANTIATION - -} // namespace candidate -} // namespace pten diff --git a/paddle/pten/core/candidate/dense_tensor.h b/paddle/pten/core/candidate/dense_tensor.h deleted file mode 100644 index 21a093439529f..0000000000000 --- a/paddle/pten/core/candidate/dense_tensor.h +++ /dev/null @@ -1,188 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/fluid/framework/ddim.h" -#include "paddle/fluid/platform/place.h" -#include "paddle/pten/common/data_type.h" -#include "paddle/pten/core/allocator.h" -#include "paddle/pten/core/storage.h" -#include "paddle/pten/core/tensor_base.h" - -namespace pten { -namespace candidate { - -using DDim = paddle::framework::DDim; - -/// \brief The meta data of dense tensor. Take the structure type -/// and use all default operations. -/// -struct DenseTensorMeta { - using DataType = paddle::experimental::DataType; - using DataLayout = paddle::experimental::DataLayout; - - DenseTensorMeta() = default; - DenseTensorMeta(DataType type, const DDim& dims); - DenseTensorMeta(DataType type, const DDim& dims, DataLayout layout); - DenseTensorMeta(DataType type, - const DDim& dims, - DataLayout layout, - const std::vector>& lod); - - /// \brief Test whether the metadata is valid. Does not throw exceptions. - /// \return Whether the metadata is valid. - bool valid() const noexcept; - - /// During the entire life cycle of a DenseTensor, the following attributes - /// marked with `const` are expected to remain unchanged. - const bool is_scalar{false}; - DDim dims; - const DataType type{DataType::FLOAT32}; - const DataLayout layout{DataLayout::NCHW}; - std::vector> lod; -}; - -/// \brief The Dense tensor store values in a contiguous sequential block -/// of memory where all values are represented. Tensors or multi-dimensional -/// arrays are used in math operators. -/// During the entire life cycle of a DenseTensor, its device type and key -/// metadata are set unchanged. -class DenseTensor : public TensorBase, - public TypeInfoTraits { - public: - /// \brief Construct a dense tensor and allocate space. - /// \param a The allocator used to allocate space. - /// \param meta The meta data of dense tensor. - DenseTensor(const std::shared_ptr& a, const DenseTensorMeta& meta); - - /// \brief Construct a dense tensor and allocate space. - /// \param a The allocator used to allocate space. - /// \param meta The meta data of dense tensor. - DenseTensor(const std::shared_ptr& a, DenseTensorMeta&& meta); - - /// \brief Use existing storage space to create dense tensor. This interface - /// can be used to deliberately create an uninitialized dense tensor. - /// \param storage The existing storage. - /// \param meta The meta data of dense tensor. - DenseTensor(intrusive_ptr storage, const DenseTensorMeta& meta); - - /// \brief Use existing storage space to create dense tensor. This interface - /// can be used to deliberately create an uninitialized dense tensor. - /// \param storage The existing storage. - /// \param meta The meta data of dense tensor. - DenseTensor(intrusive_ptr storage, DenseTensorMeta&& meta); - - /// \brief Because dense tensor is a kind of container, we give a default - /// constructor to use for stl container. But the dense tensor created with - /// the default constructor is not practical. - DenseTensor() = default; - - /// \brief Because dense tensor is a resource handle, we provide a default - /// move constructor to support move semantics. - DenseTensor(DenseTensor&& other) = default; - - /// \brief We do not recommend deep copy of dense tensor because of its - /// efficiency and complexity across devices. The operation is disabled here. - DenseTensor(const DenseTensor& other) = delete; - - /// \brief Destroy the tensor object and release exclusive resources. - virtual ~DenseTensor() = default; - - public: - /// \brief Returns the name of the class for type traits. - /// \return The name of the class. - static const char* name() { return "DenseTensor"; } - - /// \brief Returns the number of elements contained in tensor. - /// \return The number of elements contained in tensor. - int64_t numel() const; - - /// \brief Returns the dims of the tensor. - /// \return The dims of the tensor. - const DDim& dims() const noexcept { return meta_.dims; } - - /// \brief Returns the lod of the tensor. - /// \return The lod of the tensor. - const std::vector>& lod() const noexcept { - return meta_.lod; - } - - /// \brief Returns the data type of the tensor. - /// \return The data type of the tensor. - DataType data_type() const noexcept { return meta_.type; } - - /// \brief Returns the data layout of the tensor. - /// \return The data layout of the tensor. - DataLayout layout() const noexcept { return meta_.layout; } - - /// \brief Returns the data place of the tensor. - /// \return The data place of the tensor. - const Place& place() const { return storage_->place(); } - - /// \brief Test whether the metadata is valid. - /// \return Whether the metadata is valid. - bool valid() const noexcept { return meta_.valid(); } - - /// \brief Test whether the storage is allocated. - /// return Whether the storage is allocated. - bool initialized() const { return storage_->data(); } - - /// \brief Check if storage is shared with other objects. - /// \return Whether the storage is shared with other objects. - bool SharesStorageWith(const DenseTensor& b) const; - - /// \brief Change the dims information in the metadata, and the corresponding - /// memory allocation will occur when the `mutable_data` is called. - /// \param dims The new dims of the dense tensor. - void Resize(const DDim& dims) noexcept { meta_.dims = dims; } - - /// \brief Returns the actual storage size occupied by tensor, may be larger - /// than its shape dims. - /// \return The actual storage size occupied by tensor. - size_t memory_size() const { return storage_->size(); } - - /// \brief Check that the storage area is large enough to hold the data of the - /// metadata size, and throw an exception if the conditions are not met. - void check_memory_size() const; - - /// \brief Release the storage area for other purposes. Because of the - /// destruction of encapsulation, we do not support two dense tensors directly - /// sharing the same intrusive pointer. - /// \return The rvalue of instrusize pointer releated to the released storage. - intrusive_ptr release() { return std::move(storage_); } - - /// \brief Get the mutable data pointer value of type T. - /// Memory allocation may occur when calling this interface: - /// 1. When the storage size is not enough to meet the current shape of the - /// data. - /// 2. When more request_bytes parameters are used to reserve the data - /// storage. - /// param request_bytes The bytes to reserve the data storage. - /// \return The mutable data pointer value of type T. - template - T* mutable_data(size_t request_bytes = 0); - - /// \brief Get the const data pointer value of type T. - /// \return The const data pointer value of type T. - template - const T* data() const; - - private: - DenseTensorMeta meta_; - intrusive_ptr storage_; -}; - -} // namespace candidate -} // namespace pten diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc index 0a11c8e7d1912..647ddea0b4e1b 100644 --- a/paddle/pten/core/dense_tensor.cc +++ b/paddle/pten/core/dense_tensor.cc @@ -13,114 +13,126 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/core/convert_utils.h" - -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/gpu_info.h" -#include "paddle/fluid/platform/place.h" namespace pten { -using CPUPlace = paddle::platform::CPUPlace; -using CUDAPlace = paddle::platform::CUDAPlace; -using CUDAPinnedPlace = paddle::platform::CUDAPinnedPlace; -using XPUPlace = paddle::platform::XPUPlace; -using NPUPlace = paddle::platform::NPUPlace; -using NPUPinnedPlace = paddle::platform::NPUPinnedPlace; +DenseTensor::DenseTensor(const std::shared_ptr& a, + const DenseTensorMeta& meta) + : meta_(meta), + storage_( + make_intrusive(a, SizeOf(data_type()) * numel())) {} -const paddle::platform::Place& DenseTensor::place() const { - PADDLE_ENFORCE_NOT_NULL( - allocation_, - paddle::platform::errors::PreconditionNotMet( - "Tensor not initialized yet when Tensor::place() is called.")); - return allocation_->place(); -} +DenseTensor::DenseTensor(const std::shared_ptr& a, + DenseTensorMeta&& meta) + : meta_(std::move(meta)), + storage_( + make_intrusive(a, SizeOf(data_type()) * numel())) {} + +DenseTensor::DenseTensor(intrusive_ptr storage, + const DenseTensorMeta& meta) + : meta_(meta), storage_(std::move(storage)) {} -//---------------------------------------------------------------- -// Inner methods +DenseTensor::DenseTensor(intrusive_ptr storage, DenseTensorMeta&& meta) + : meta_(std::move(meta)), storage_(std::move(storage)) {} -void DenseTensor::ShareAllocation( - const std::shared_ptr& allocation) { - // This operation can be very slow! - // std::shared_ptr reference count is atomic. increasing or decreasing - // the reference count requires atomic increment or decrement. - // This is hundred times slower than non-atomic increment/decrement - allocation_ = allocation; +int64_t DenseTensor::numel() const { + if (meta_.is_scalar) { + return 1; + } + return product(meta_.dims); +} + +bool DenseTensor::IsSharedWith(const DenseTensor& b) const { + return storage_.get() == b.storage_.get() && storage_.get() != nullptr; } -// TODO(chenweihang): Add other place branchs -paddle::platform::Place DenseTensor::GetPlaceByBackend() const { - switch (meta_.backend) { - case Backend::CPU: - return CPUPlace(); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - case Backend::CUDA: - return CUDAPlace(paddle::platform::GetCurrentDeviceId()); -#endif - default: - PADDLE_THROW(paddle::platform::errors::Unimplemented( - "Unsupported Tensor backend.")); +void* DenseTensor::mutable_data(size_t request_bytes) { + PADDLE_ENFORCE( + valid(), + paddle::platform::errors::PreconditionNotMet( + "The meta data must be valid when call the mutable data function.")); + PADDLE_ENFORCE_NOT_NULL( + storage_, + paddle::platform::errors::PreconditionNotMet( + "The storage must be valid when call the mutable data function.")); + size_t bytes = numel() * SizeOf(data_type()); + if (request_bytes) { + PADDLE_ENFORCE_GE(request_bytes, + bytes, + paddle::platform::errors::InvalidArgument( + "The reserved size %d should be enough to meet the " + "volume required by metadata %d.", + request_bytes, + bytes)); + bytes = request_bytes; + } + if (storage_->size() < bytes) { + storage_->Realloc(bytes); } + return storage_->data(); } -size_t DenseTensor::MemorySize() const { - return allocation_ == nullptr ? 0UL : allocation_->size() - meta_.offset; +template +T* DenseTensor::mutable_data() { + PADDLE_ENFORCE( + (data_type() == paddle::experimental::CppTypeToDataType::Type()), + paddle::platform::errors::PreconditionNotMet( + "The type of data (%d) we are trying to retrieve does not match the " + "type of data currently contained in the container (%d).", + static_cast(paddle::experimental::CppTypeToDataType::Type()), + static_cast(data_type()))); + return static_cast(mutable_data()); } -void DenseTensor::CheckMemorySize() const { - PADDLE_ENFORCE_NOT_NULL(allocation_, - paddle::platform::errors::PreconditionNotMet( - "Tensor holds no memory. " - "Call Tensor::mutable_data firstly.")); - size_t size_of_type = - paddle::framework::SizeOfType(TransToProtoVarType(meta_.type)); - PADDLE_ENFORCE_LE( - numel() * size_of_type, - MemorySize(), +template +const T* DenseTensor::data() const { + PADDLE_ENFORCE( + (data_type() == paddle::experimental::CppTypeToDataType::Type()), paddle::platform::errors::PreconditionNotMet( - "Tensor's dimension is out of bound." - "Tensor's dimension must be equal or less than the size of its " - "memory." - "But received Tensor's dimension is d%, memory's size is %d.", - numel() * size_of_type, - MemorySize())); + "The type of data we are trying to retrieve does not match the " + "type of data currently contained in the container.")); + return static_cast(data()); } const void* DenseTensor::data() const { - CheckMemorySize(); - return reinterpret_cast( - reinterpret_cast(allocation_->ptr()) + meta_.offset); + PADDLE_ENFORCE_NOT_NULL( + storage_, + paddle::platform::errors::PreconditionNotMet( + "The storage must be valid when call the mutable data function.")); + return storage_->data(); } -void* DenseTensor::mutable_data() { - PADDLE_ENFORCE_GE( - numel(), - 0, - paddle::platform::errors::PreconditionNotMet( - "The Tensor's element number must be equal or greater than zero. " - "The Tensor's shape is [", - dims(), - "] now")); - size_t size = - numel() * paddle::framework::SizeOfType(TransToProtoVarType(meta_.type)); - auto place = GetPlaceByBackend(); - if (allocation_ == nullptr) { - allocation_.reset(); - allocation_ = paddle::memory::AllocShared(place, size); - } else { - if (!(allocation_->place() == place) || - allocation_->size() < size + meta_.offset) { - allocation_.reset(); - allocation_ = paddle::memory::AllocShared(place, size); - } else { - // do nothing - } - } - return reinterpret_cast( - reinterpret_cast(allocation_->ptr()) + meta_.offset); +void DenseTensor::check_memory_size() const { + size_t bytes = numel() * SizeOf(data_type()); + PADDLE_ENFORCE_GE(memory_size(), + bytes, + paddle::platform::errors::InvalidArgument( + "The memory size %d should be enough to meet the " + "volume required by metadata %d.", + memory_size(), + bytes)); } +#define DATA_MEMBER_FUNC_INSTANTIATION(dtype) \ + template dtype* DenseTensor::mutable_data(); \ + template const dtype* DenseTensor::data() const; + +DATA_MEMBER_FUNC_INSTANTIATION(bool); +DATA_MEMBER_FUNC_INSTANTIATION(int8_t); +DATA_MEMBER_FUNC_INSTANTIATION(uint8_t); +DATA_MEMBER_FUNC_INSTANTIATION(int16_t); +DATA_MEMBER_FUNC_INSTANTIATION(uint16_t); +DATA_MEMBER_FUNC_INSTANTIATION(int32_t); +DATA_MEMBER_FUNC_INSTANTIATION(uint32_t); +DATA_MEMBER_FUNC_INSTANTIATION(int64_t); +DATA_MEMBER_FUNC_INSTANTIATION(uint64_t); +DATA_MEMBER_FUNC_INSTANTIATION(::paddle::platform::bfloat16); +DATA_MEMBER_FUNC_INSTANTIATION(::paddle::platform::float16); +DATA_MEMBER_FUNC_INSTANTIATION(float); +DATA_MEMBER_FUNC_INSTANTIATION(double); +DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex64); +DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128); + +#undef DATA_MEMBER_FUNC_INSTANTIATION + } // namespace pten diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h index e913440a7e663..46932ecac2ad0 100644 --- a/paddle/pten/core/dense_tensor.h +++ b/paddle/pten/core/dense_tensor.h @@ -14,137 +14,159 @@ limitations under the License. */ #pragma once -#include - +#include "paddle/pten/core/allocator.h" +#include "paddle/pten/core/storage.h" #include "paddle/pten/core/tensor_base.h" #include "paddle/pten/core/tensor_meta.h" -#include "paddle/pten/core/tensor_status.h" - -namespace paddle { -namespace memory { -namespace allocation { -class Allocation; -} -} -} namespace pten { -using DataType = paddle::experimental::DataType; - -/** - * The implementation of general Tensor (For CPU, CUDA, HIP, etc.), similar - * to the Tensor in fluid, contains a pointer to Allocation and a series of - * descriptive metadata and status required by Tensor. - * - * DenseTensor is still a base class, it may have inherited classes. - * - * The memory layout of these inherited classes is consistent with the - * basic DenseTensor, except that a small number of members are added to - * further specialize the description of the tensor. - * - * If the memory layout is different, it cannot be described based on the - * general Allocation, and it needs to be directly inherited from - * TensorBase. - */ -class DenseTensor : public TensorBase { +/// \brief The Dense tensor store values in a contiguous sequential block +/// of memory where all values are represented. Tensors or multi-dimensional +/// arrays are used in math operators. +/// During the entire life cycle of a DenseTensor, its device type and key +/// metadata are set unchanged. +class DenseTensor : public TensorBase, + public TypeInfoTraits { public: - // Not allowed to initialize a tensor without descriptive metadata - DenseTensor() = delete; - - // DenseTensor(const DenseTensor&) = delete; - // DenseTensor& operator=(const DenseTensor&) = delete; - DenseTensor(DenseTensor&&) = delete; - DenseTensor& operator=(DenseTensor&&) = delete; - - /** - * If we still malloc memory by mutable_data, - * the DenseTensor doesn't need complicated constructor. - * - * Note: Tensor objects lacking meta information are not allowed to exist. - */ - DenseTensor(const TensorMeta& meta, const TensorStatus& status) - : meta_(meta), status_(status) {} - - DenseTensor(TensorMeta&& meta, TensorStatus&& status) - : meta_(std::move(meta)), status_(std::move(status)) {} - - int64_t numel() const override { return meta_.numel; } - - const paddle::framework::DDim& dims() const override { return meta_.dims; } - - DataType data_type() const override { return meta_.type; } + /// \brief Construct a dense tensor and allocate space. + /// \param a The allocator used to allocate space. + /// \param meta The meta data of dense tensor. + DenseTensor(const std::shared_ptr& a, const DenseTensorMeta& meta); + + /// \brief Construct a dense tensor and allocate space. + /// \param a The allocator used to allocate space. + /// \param meta The meta data of dense tensor. + DenseTensor(const std::shared_ptr& a, DenseTensorMeta&& meta); + + /// \brief Use existing storage space to create dense tensor. This interface + /// can be used to deliberately create an uninitialized dense tensor. + /// \param storage The existing storage. + /// \param meta The meta data of dense tensor. + DenseTensor(intrusive_ptr storage, const DenseTensorMeta& meta); + + /// \brief Use existing storage space to create dense tensor. This interface + /// can be used to deliberately create an uninitialized dense tensor. + /// \param storage The existing storage. + /// \param meta The meta data of dense tensor. + DenseTensor(intrusive_ptr storage, DenseTensorMeta&& meta); + + /// \brief Because dense tensor is a kind of container, we give a default + /// constructor to use for stl container. But the dense tensor created with + /// the default constructor is not practical. + DenseTensor() = default; + + /// \brief Because dense tensor is a resource handle, we provide a default + /// move constructor to support move semantics. + DenseTensor(DenseTensor&& other) = default; + + /// \brief We do not recommend deep copy of dense tensor because of its + /// efficiency and complexity across devices. The operation is disabled here. + DenseTensor(const DenseTensor& other) = delete; + + /// \brief Destroy the tensor object and release exclusive resources. + virtual ~DenseTensor() = default; - DataLayout layout() const override { return meta_.layout; } - - const paddle::platform::Place& place() const override; - - Backend backend() const override { return meta_.backend; } - - bool valid() const override { return allocation_ != nullptr; } - - bool initialized() const override { return allocation_ != nullptr; } - - /* member methods */ - - const std::shared_ptr& allocation() - const { - return allocation_; + public: + /// \brief Returns the name of the class for type traits. + /// \return The name of the class. + static const char* name() { return "DenseTensor"; } + + /// \brief Returns the number of elements contained in tensor. + /// \return The number of elements contained in tensor. + int64_t numel() const; + + /// \brief Returns the dims of the tensor. + /// \return The dims of the tensor. + const DDim& dims() const noexcept { return meta_.dims; } + + /// \brief Returns the lod of the tensor. + /// \return The lod of the tensor. + const std::vector>& lod() const noexcept { + return meta_.lod; } - const TensorMeta& meta() const { return meta_; } - - TensorMeta* mutable_meta() { return &meta_; } - - /* Data Access Methods */ - - const void* data() const; - - void* mutable_data(); - + /// \brief Set the lod of the tensor. + void set_lod(const std::vector>& lod) { meta_.lod = lod; } + + /// \brief Returns the data type of the tensor. + /// \return The data type of the tensor. + DataType data_type() const noexcept { return meta_.type; } + + /// \brief Returns the data layout of the tensor. + /// \return The data layout of the tensor. + DataLayout layout() const noexcept { return meta_.layout; } + + /// \brief Returns the data place of the tensor. + /// \return The data place of the tensor. + const Place& place() const { return storage_->place(); } + + /// \brief Returns the meta information of the tensor. + /// \return The meta information of the tensor. + const DenseTensorMeta& meta() const noexcept { return meta_; } + + /// \brief Test whether the metadata is valid. + /// \return Whether the metadata is valid. + bool valid() const noexcept { return meta_.valid(); } + + /// \brief Test whether the storage is allocated. + /// return Whether the storage is allocated. + bool initialized() const { return storage_->data(); } + + /// \brief Check if storage is shared with other objects. + /// \return Whether the storage is shared with other objects. + bool IsSharedWith(const DenseTensor& b) const; + + /// \brief Change the dims information in the metadata, and the corresponding + /// memory allocation will occur when the `mutable_data` is called. + /// \param dims The new dims of the dense tensor. + void Resize(const DDim& dims) noexcept { meta_.dims = dims; } + + /// \brief Returns the actual storage size occupied by tensor, may be larger + /// than its shape dims. + /// \return The actual storage size occupied by tensor. + size_t memory_size() const { return storage_->size(); } + + /// \brief Check that the storage area is large enough to hold the data of the + /// metadata size, and throw an exception if the conditions are not met. + void check_memory_size() const; + + /// \brief Release the storage area for other purposes. Because of the + /// destruction of encapsulation, we do not support two dense tensors directly + /// sharing the same intrusive pointer. + /// \return The rvalue of instrusize pointer releated to the released storage. + intrusive_ptr release() { return std::move(storage_); } + + /// \brief Get the mutable data pointer value of type T. + /// Memory allocation may occur when calling this interface: + /// 1. When the storage size is not enough to meet the current shape of the + /// data. + /// \return The mutable data pointer value of type T. template - const T* data() const { - static_assert(std::is_pod::value || std::is_same::value, - "T must be POD when call Tensor.data()."); - return reinterpret_cast(data()); - } - - // NOTE: mutable_data does not hold arguments. Before calling mutable_data, - // please make sure that Tensor has maintained - // the correct meta and status. - // - // TODO(chenweihang): We need to be able to specify the allocator when - // mutable_data, or directly remove the mutable_data method. - // DenseTensor cannot actively apply for memory. Its memory application is - // handled by the DeviceContext->AllocateTensorData interface. - // I prefer the latter + T* mutable_data(); + + /// \brief Get the mutable data pointer value of raw type. + /// Memory allocation may occur when calling this interface: + /// 1. When the storage size is not enough to meet the current shape of the + /// data. + /// 2. When more request_bytes parameters are used to reserve the data + /// storage. + /// param request_bytes The bytes to reserve the data storage. + /// \return The mutable data pointer value of type T. + void* mutable_data(size_t request_bytes = 0); + + /// \brief Get the const data pointer value of type T. + /// \return The const data pointer value of type T. template - T* mutable_data() { - static_assert(std::is_pod::value, - "T must be POD when call Tensor.mutable_data()."); - return reinterpret_cast(mutable_data()); - } - - // For non-API and non-member interfaces, we still follow the C++ code style? - - void Resize(const DDim& dims) { meta_.dims = dims; } - - void ShareAllocation(const std::shared_ptr< - paddle::memory::allocation::Allocation>& allocation); + const T* data() const; - paddle::platform::Place GetPlaceByBackend() const; - - size_t MemorySize() const; - - void CheckMemorySize() const; + /// \brief Get the const data pointer value of raw type. + /// \return The const data pointer value of raw type. + const void* data() const; private: - // The actual Tensor storage holder - std::shared_ptr allocation_; - // The Tensor meta data - TensorMeta meta_; - // The Tensor status data - TensorStatus status_; + DenseTensorMeta meta_; + intrusive_ptr storage_; }; } // namespace pten diff --git a/paddle/pten/core/tensor_base.h b/paddle/pten/core/tensor_base.h index 74cc082646fe2..79fd742aea10b 100644 --- a/paddle/pten/core/tensor_base.h +++ b/paddle/pten/core/tensor_base.h @@ -61,8 +61,6 @@ class TensorBase { /// return Whether the storage is allocated. virtual bool initialized() const = 0; - virtual paddle::experimental::Backend backend() const { return {}; } - /// \brief Return the type information of the derived class to support /// safely downcast in non-rtti environment. /// return The type information of the derived class. diff --git a/paddle/pten/core/tensor_meta.h b/paddle/pten/core/tensor_meta.h index 8783ee584faf6..b4452a644f152 100644 --- a/paddle/pten/core/tensor_meta.h +++ b/paddle/pten/core/tensor_meta.h @@ -28,114 +28,58 @@ limitations under the License. */ namespace pten { -// template -// using Vector = paddle::framework::Vector; - -/* - * LoD is short for Level of Details. - * - * - in a level, each element indicates relative offset of the lower level - * - the first element should be 0 and that indicates that this sequence start - * from 0 - * - each sequence's begin and end(no-inclusive) is level[id, id+1] - * - * For example: - * 3-level LoD stores - * - * 0 2 3 - * 0 2 4 7 - * 0 2 5 7 10 12 15 20 - */ -// using LoD = std::vector>; -using LoD = std::vector>; using DDim = paddle::framework::DDim; -/** - * The Meta data member of DenseTensor. - * - * Here the `meta` represents information describing the basic features and - * data features of Tensor, and does not include the status information of - * Tensor - * - * Note: TensorMeta is a struct, the members are named like - * ordinary nonmember variables, such as `type` instead of `type_`. - * And we direct access its members, in addition to constructor, destructor - * and functions for setting data members, can not provide other functions. - */ -struct TensorMeta { - TensorMeta() = delete; - TensorMeta& operator=(const TensorMeta&) = delete; - TensorMeta& operator=(TensorMeta&&) = delete; - - TensorMeta(const TensorMeta&) = default; - // TensorMeta(TensorMeta&&) = default; - - TensorMeta(TensorMeta&& meta) - : dims(meta.dims), - backend(meta.backend), - type(meta.type), - layout(meta.layout), - numel(meta.numel), - offset(meta.offset), - lod(meta.lod) {} - - // Compatible Contructor - TensorMeta(const DDim& dims, - Backend backend, - DataType type, - DataLayout layout, - size_t offset = 0UL, - const LoD& lod = {}) - : dims(dims), - backend(backend), - type(type), - layout(layout), - offset(offset), - lod(lod) { - int64_t init_numel = paddle::framework::product(dims); - if (init_numel >= 0) { - numel = init_numel; - } - } - - virtual ~TensorMeta() = default; +using LoD = std::vector>; +/// \brief The meta data of dense tensor. Take the structure type +/// and use all default operations. +/// +struct DenseTensorMeta { + using DataType = paddle::experimental::DataType; + using DataLayout = paddle::experimental::DataLayout; + + DenseTensorMeta() = default; + DenseTensorMeta(DataType type, const DDim& dims); + DenseTensorMeta(DataType type, const DDim& dims, DataLayout layout); + DenseTensorMeta(DataType type, + const DDim& dims, + DataLayout layout, + const std::vector>& lod); + + /// \brief Test whether the metadata is valid. Does not throw exceptions. + /// \return Whether the metadata is valid. + bool valid() const noexcept; + + /// During the entire life cycle of a DenseTensor, the following attributes + /// marked with `const` are expected to remain unchanged. + const bool is_scalar{false}; DDim dims; - - Backend backend{Backend::CPU}; - DataType type{DataType::FLOAT32}; - DataLayout layout{DataLayout::NCHW}; - - /** - * [ Why not calculate numel based on dims? ] - * - * Tensor may be 0-dimensional, but 0-dimensional Tensor may have values. - * For example: - * - * import paddle - * - * a = paddle.to_tensor([1, 2, 3]) - * print(a[0].shape) # expected: [] - * print(a[0].numel()) # expected: 1 - * - * Now Paddle can not get expected result above, because the old Tensor's - * numel is calculated based on dims. - */ - int64_t numel{1}; - - size_t offset{0}; - - /** - * [ Why basic TensorMeta hold LoD? ] - * - * LoDTensor is still the main Tensor concept in Paddle. - * Although only a small number of ops need to use LoD information, - * LoD may need to be passed between Op's input and output, which is - * difficult to remove in a short time. - * - * But we don't want to add a Tensor type because of LoD, which makes - * the concept complicated, so LoD is a member held by Tensor by default. - */ + const DataType type{DataType::FLOAT32}; + const DataLayout layout{DataLayout::NCHW}; LoD lod; }; +inline DenseTensorMeta::DenseTensorMeta(DataType type, const DDim& dims) + : dims(dims), type(type) {} + +inline DenseTensorMeta::DenseTensorMeta(DataType type, + const DDim& dims, + DataLayout layout) + : dims(dims), type(type), layout(layout) {} + +inline DenseTensorMeta::DenseTensorMeta( + DataType type, + const DDim& dims, + DataLayout layout, + const std::vector>& lod) + : dims(dims), type(type), layout(layout), lod(lod) {} + +inline bool DenseTensorMeta::valid() const noexcept { + bool valid{true}; + valid = valid && (type != DataType::UNDEFINED); + valid = valid && (layout != DataLayout::UNDEFINED); + valid = valid && (is_scalar || product(dims)); + return valid; +} + } // namespace pten diff --git a/paddle/pten/hapi/CMakeLists.txt b/paddle/pten/hapi/CMakeLists.txt index 8a33de85bddd3..4b427b3b4a383 100644 --- a/paddle/pten/hapi/CMakeLists.txt +++ b/paddle/pten/hapi/CMakeLists.txt @@ -1,3 +1,3 @@ add_subdirectory(lib) -cc_library(pten_hapi SRCS all.cc DEPS math_api linalg_api creation_api) +cc_library(pten_hapi SRCS all.cc DEPS linalg_api math_api creation_api) diff --git a/paddle/pten/hapi/lib/creation.cc b/paddle/pten/hapi/lib/creation.cc index 5048b983b122f..cda8d24b5e6ad 100644 --- a/paddle/pten/hapi/lib/creation.cc +++ b/paddle/pten/hapi/lib/creation.cc @@ -21,6 +21,7 @@ limitations under the License. */ #include "paddle/pten/api/include/core.h" #include "paddle/pten/api/include/infershape.h" #include "paddle/pten/hapi/lib/kernel_dispatch.h" +#include "paddle/pten/hapi/lib/utils/allocator.h" namespace paddle { namespace experimental { @@ -50,10 +51,12 @@ Tensor full_like(const Tensor& x, Tensor out; // InferDataType if (dtype != pten::DataType::UNDEFINED) { - out_meta.type = dtype; + const_cast(out_meta.type) = dtype; } - auto dense_out = - std::make_shared(out_meta, pten::TensorStatus()); + const auto allocator = + std::make_shared( + pten::TransToFluidPlace(kernel_key.backend())); + auto dense_out = std::make_shared(allocator, out_meta); kernel_context.EmplaceBackOutput(dense_out); out.set_impl(dense_out); diff --git a/paddle/pten/hapi/lib/linalg.cc b/paddle/pten/hapi/lib/linalg.cc index 1269702f28f91..54829feb43a24 100644 --- a/paddle/pten/hapi/lib/linalg.cc +++ b/paddle/pten/hapi/lib/linalg.cc @@ -24,6 +24,7 @@ limitations under the License. */ #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_context.h" #include "paddle/pten/hapi/lib/kernel_dispatch.h" +#include "paddle/pten/hapi/lib/utils/allocator.h" #include "paddle/pten/infershape/binary.h" namespace paddle { @@ -52,8 +53,9 @@ Tensor dot(const Tensor& x, const Tensor& y) { // 5. Prepare outputs Tensor out; - auto dense_out = - std::make_shared(out_meta, pten::TensorStatus()); + const auto allocator = std::make_shared( + pten::TransToFluidPlace(kernel_key.backend())); + auto dense_out = std::make_shared(allocator, out_meta); kernel_context.EmplaceBackOutput(dense_out); out.set_impl(dense_out); diff --git a/paddle/pten/hapi/lib/manipulation.cc b/paddle/pten/hapi/lib/manipulation.cc index 4b9b66b9df0bd..fa60bac6d1aed 100644 --- a/paddle/pten/hapi/lib/manipulation.cc +++ b/paddle/pten/hapi/lib/manipulation.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "glog/logging.h" #include "paddle/pten/api/include/core.h" #include "paddle/pten/hapi/lib/kernel_dispatch.h" +#include "paddle/pten/hapi/lib/utils/allocator.h" #include "paddle/pten/infershape/unary.h" namespace paddle { @@ -46,8 +47,9 @@ Tensor flatten(const Tensor& x, int start_axis, int stop_axis) { // 5. Prepare outputs Tensor out; - auto dense_out = - std::make_shared(out_meta, pten::TensorStatus()); + const auto allocator = std::make_shared( + pten::TransToFluidPlace(kernel_key.backend())); + auto dense_out = std::make_shared(allocator, out_meta); kernel_context.EmplaceBackOutput(dense_out); out.set_impl(dense_out); diff --git a/paddle/pten/hapi/lib/math.cc b/paddle/pten/hapi/lib/math.cc index 851a9bc155cdd..5e4e96d333030 100644 --- a/paddle/pten/hapi/lib/math.cc +++ b/paddle/pten/hapi/lib/math.cc @@ -21,6 +21,7 @@ limitations under the License. */ #include "paddle/pten/api/include/core.h" #include "paddle/pten/api/include/infershape.h" #include "paddle/pten/hapi/lib/kernel_dispatch.h" +#include "paddle/pten/hapi/lib/utils/allocator.h" #include "paddle/pten/infershape/unary.h" namespace paddle { @@ -46,8 +47,10 @@ Tensor mean(const Tensor& x) { // 5. Prepare outputs Tensor out; - auto dense_out = - std::make_shared(out_meta, pten::TensorStatus()); + const auto allocator = + std::make_shared( + pten::TransToFluidPlace(kernel_key.backend())); + auto dense_out = std::make_shared(allocator, out_meta); kernel_context.EmplaceBackOutput(dense_out); out.set_impl(dense_out); diff --git a/paddle/pten/hapi/lib/utils/CMakeLists.txt b/paddle/pten/hapi/lib/utils/CMakeLists.txt index 4ab33a10dcdc4..c89ef812846ad 100644 --- a/paddle/pten/hapi/lib/utils/CMakeLists.txt +++ b/paddle/pten/hapi/lib/utils/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(tests) -cc_library(pten_hapi_utils SRCS allocator.cc storage tensor_utils DEPS tensor_base pten_dense_tensor pten_utils) +cc_library(pten_hapi_utils SRCS allocator.cc storage.cc tensor_utils.cc DEPS tensor_base convert_utils +dense_tensor lod_tensor selected_rows place var_type_traits) diff --git a/paddle/pten/hapi/lib/utils/tensor_utils.cc b/paddle/pten/hapi/lib/utils/tensor_utils.cc index be7feebe8c206..2fb39852702c2 100644 --- a/paddle/pten/hapi/lib/utils/tensor_utils.cc +++ b/paddle/pten/hapi/lib/utils/tensor_utils.cc @@ -15,5 +15,113 @@ limitations under the License. */ #include "paddle/pten/hapi/lib/utils/tensor_utils.h" namespace paddle { -namespace experimental {} // namespace experimental +namespace experimental { + +template +void SetLoD(DstLoD* dst, const SrcLoD& src) { + dst->reserve(src.size()); + dst->clear(); + for (auto&& v : src) { + dst->emplace_back(v); + } +} + +std::unique_ptr MakePtenDenseTensor( + const paddle::framework::Tensor& src) { + pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()), + src.dims(), + pten::TransToPtenDataLayout(src.layout())}; + auto shared_storage = pten::make_intrusive(src.Holder()); + return std::make_unique(std::move(shared_storage), + std::move(meta)); +} + +std::unique_ptr MakePtenDenseTensor( + const paddle::framework::LoDTensor& src) { + pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()), + src.dims(), + pten::TransToPtenDataLayout(src.layout())}; + SetLoD(&meta.lod, src.lod()); + auto shared_storage = pten::make_intrusive(src.Holder()); + return std::make_unique(std::move(shared_storage), + std::move(meta)); +} + +std::unique_ptr MakePtenTensorBaseFromVar( + const framework::Variable& variable, const pten::TensorArgDef& arg_def) { + auto expected_place = pten::TransToFluidPlace(arg_def.backend); + + if (variable.IsType()) { + const auto& tensor = variable.Get(); + if (!platform::is_same_place(tensor.place(), expected_place)) { + framework::LoDTensor tmp_tensor; + framework::TensorCopySync(tensor, expected_place, &tmp_tensor); + return MakePtenDenseTensor(tmp_tensor); + } else { + return MakePtenDenseTensor(tensor); + } + } else if (variable.IsType()) { + // TODO(chenweihang): now we don't deal with row and height + // by xiaowei's advice + const auto& tensor = variable.Get(); + if (!platform::is_same_place(tensor.value().place(), expected_place)) { + framework::Tensor tmp_tensor; + TensorCopySync(tensor.value(), expected_place, &tmp_tensor); + // TODO(chenweihang): adapt SelectedRows by xiaowei's design + return MakePtenDenseTensor(tmp_tensor); + } else { + return MakePtenDenseTensor(tensor.value()); + } + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported shared input `%s` type now when call pt kernel.", + framework::ToTypeName(variable.Type()))); + } + return {}; +} + +std::unique_ptr MakePtenTensorBaseFromVar( + framework::Variable* variable, const pten::TensorArgDef& arg_def) { + // mutable_data before run kernel, to avoid share output form + // KernelContext to original tensor + if (variable->template IsType()) { + auto* tensor = variable->template GetMutable(); + tensor->mutable_data(pten::TransToFluidPlace(arg_def.backend), + pten::TransToProtoVarType(arg_def.dtype)); + return MakePtenDenseTensor(*tensor); + } else if (variable->template IsType()) { + auto* tensor = variable->template GetMutable(); + tensor->mutable_value()->mutable_data( + pten::TransToFluidPlace(arg_def.backend), + pten::TransToProtoVarType(arg_def.dtype)); + // TODO(chenweihang): adapt SelectedRows by xiaowei's design, + // here the row and height will lost in output! + return MakePtenDenseTensor(tensor->value()); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported shared output `%s` type now when call pt kernel.", + framework::ToTypeName(variable->Type()))); + } + return {}; +} + +void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { + CHECK(src); + CHECK(dst); + dst->Resize(src->dims()); + auto storage = src->release(); + CHECK(storage->OwnsMemory()); + std::shared_ptr holder( + new TensorStorage(std::move(storage))); + dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->data_type())); +} + +void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) { + CHECK(src); + CHECK(dst); + SetLoD(dst->mutable_lod(), src->lod()); + MovesStorage(src, static_cast(dst)); +} + +} // namespace experimental } // namespace paddle diff --git a/paddle/pten/hapi/lib/utils/tensor_utils.h b/paddle/pten/hapi/lib/utils/tensor_utils.h index c9d2f8ca32963..a2b2688362a4c 100644 --- a/paddle/pten/hapi/lib/utils/tensor_utils.h +++ b/paddle/pten/hapi/lib/utils/tensor_utils.h @@ -17,64 +17,32 @@ limitations under the License. */ #include #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/variable.h" -#include "paddle/pten/core/candidate/dense_tensor.h" #include "paddle/pten/core/convert_utils.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/kernel_factory.h" #include "paddle/pten/hapi/lib/utils/allocator.h" #include "paddle/pten/hapi/lib/utils/storage.h" namespace paddle { namespace experimental { -using namespace pten::candidate; // NOLINT +std::unique_ptr MakePtenDenseTensor( + const paddle::framework::Tensor& src); -template -void SetLoD(DstLoD* dst, const SrcLoD& src) { - dst->reserve(src.size()); - dst->clear(); - for (auto&& v : src) { - dst->emplace_back(v); - } -} +std::unique_ptr MakePtenDenseTensor( + const paddle::framework::LoDTensor& src); -std::shared_ptr MakeSharedDenseTensor( - const paddle::framework::Tensor& src) { - DenseTensorMeta meta{pten::TransToPtenDataType(src.type()), - src.dims(), - pten::TransToPtenDataLayout(src.layout())}; - auto shared_storage = pten::make_intrusive(src.Holder()); - return std::make_shared(std::move(shared_storage), - std::move(meta)); -} +std::unique_ptr MakePtenTensorBaseFromVar( + const framework::Variable& variable, const pten::TensorArgDef& arg_def); -std::shared_ptr MakeSharedDenseTensor( - const paddle::framework::LoDTensor& src) { - DenseTensorMeta meta{pten::TransToPtenDataType(src.type()), - src.dims(), - pten::TransToPtenDataLayout(src.layout())}; - SetLoD(&meta.lod, src.lod()); - auto shared_storage = pten::make_intrusive(src.Holder()); - return std::make_shared(std::move(shared_storage), - std::move(meta)); -} +std::unique_ptr MakePtenTensorBaseFromVar( + framework::Variable* variable, const pten::TensorArgDef& arg_def); -void MovesStorage(DenseTensor* src, paddle::framework::Tensor* dst) { - CHECK(src); - CHECK(dst); - dst->Resize(src->dims()); - auto storage = src->release(); - CHECK(storage->OwnsMemory()); - std::shared_ptr holder( - new TensorStorage(std::move(storage))); - dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->data_type())); -} +void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst); -void MovesStorage(DenseTensor* src, paddle::framework::LoDTensor* dst) { - CHECK(src); - CHECK(dst); - SetLoD(dst->mutable_lod(), src->lod()); - MovesStorage(src, static_cast(dst)); -} +void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst); } // namespace experimental } // namespace paddle diff --git a/paddle/pten/hapi/lib/utils/tests/test_tensor_utils.cc b/paddle/pten/hapi/lib/utils/tests/test_tensor_utils.cc index f45537508d29a..56184eec70f26 100644 --- a/paddle/pten/hapi/lib/utils/tests/test_tensor_utils.cc +++ b/paddle/pten/hapi/lib/utils/tests/test_tensor_utils.cc @@ -24,8 +24,8 @@ using DDim = paddle::framework::DDim; using DataType = paddle::experimental::DataType; using DataLayout = paddle::experimental::DataLayout; -using DenseTensor = pten::candidate::DenseTensor; -using DenseTensorMeta = pten::candidate::DenseTensorMeta; +using DenseTensor = pten::DenseTensor; +using DenseTensorMeta = pten::DenseTensorMeta; TEST(tensor_utils, dense_tensor_to_lod_tensor) { const DDim dims({2, 1}); @@ -56,7 +56,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) { CHECK(lod_tensor.data()[0] == 1.0f); CHECK(lod_tensor.data()[1] == 2.1f); - auto dense_tensor_1 = MakeSharedDenseTensor(lod_tensor); + auto dense_tensor_1 = MakePtenDenseTensor(lod_tensor); CHECK(dense_tensor_1->dims() == dims); CHECK(dense_tensor_1->data_type() == dtype); CHECK(dense_tensor_1->layout() == layout); @@ -90,7 +90,7 @@ TEST(tensor_utils, dense_tensor_to_tensor) { CHECK(tensor.data()[0] == 1.0f); CHECK(tensor.data()[1] == 2.1f); - auto dense_tensor_1 = MakeSharedDenseTensor(tensor); + auto dense_tensor_1 = MakePtenDenseTensor(tensor); CHECK(dense_tensor_1->dims() == dims); CHECK(dense_tensor_1->data_type() == dtype); CHECK(dense_tensor_1->layout() == layout); @@ -99,6 +99,27 @@ TEST(tensor_utils, dense_tensor_to_tensor) { CHECK(data_1[1] == 2.1f); } +TEST(PtenUtils, VarToPtTensor) { + // 1. create Variable + paddle::framework::Variable v; + auto selected_rows = v.GetMutable(); + paddle::framework::Tensor* value = selected_rows->mutable_value(); + auto* data = value->mutable_data(paddle::framework::make_ddim({1, 1}), + paddle::platform::CPUPlace()); + data[0] = 123; + pten::Backend expect_backend = pten::Backend::CPU; + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + expect_backend = pten::Backend::CUDA; +#endif + auto tensor_def = pten::TensorArgDef( + expect_backend, pten::DataLayout::NCHW, pten::DataType::INT32); + // 2. test API + auto tensor_x = MakePtenTensorBaseFromVar(v, tensor_def); + // 3. check result + ASSERT_EQ(tensor_x->data_type(), pten::DataType::INT32); +} + } // namespace tests } // namespace experimental } // namespace paddle diff --git a/paddle/pten/infershape/binary.cc b/paddle/pten/infershape/binary.cc index 7d224835cc05a..c2b88c74d847e 100644 --- a/paddle/pten/infershape/binary.cc +++ b/paddle/pten/infershape/binary.cc @@ -17,7 +17,8 @@ limitations under the License. */ namespace pten { -TensorMeta DotInferShape(const TensorMeta& x_meta, const TensorMeta& y_meta) { +DenseTensorMeta DotInferShape(const DenseTensorMeta& x_meta, + const DenseTensorMeta& y_meta) { auto x_dims = x_meta.dims; auto x_rank = static_cast(x_dims.size()); PADDLE_ENFORCE_EQ(true, @@ -54,8 +55,7 @@ TensorMeta DotInferShape(const TensorMeta& x_meta, const TensorMeta& y_meta) { y_dims.to_str())); x_dims[x_dims.size() - 1] = 1; - TensorMeta return_meta( - x_dims, x_meta.backend, x_meta.type, x_meta.layout, x_meta.offset); + DenseTensorMeta return_meta(x_meta.type, x_dims, x_meta.layout); return return_meta; } diff --git a/paddle/pten/infershape/binary.h b/paddle/pten/infershape/binary.h index 8e44b520e0a9f..613d2f66a6edd 100644 --- a/paddle/pten/infershape/binary.h +++ b/paddle/pten/infershape/binary.h @@ -21,15 +21,19 @@ namespace pten { // Common InferShape Functions for binary operators, The format like: // -// 1. TensorMeta [OpName]InferShape(const TensorMeta& x_meta, ...) {} -// 2. std::pair [OpName]InferShape(const TensorMeta& +// 1. DenseTensorMeta [OpName]InferShape(const DenseTensorMeta& x_meta, ...) +// {} +// 2. std::pair [OpName]InferShape(const +// DenseTensorMeta& // x_meta, ...) {} -// 3. std::tuple [OpName]InferShape(const -// TensorMeta& x_meta, ...) +// 3. std::tuple +// [OpName]InferShape(const +// DenseTensorMeta& x_meta, ...) // NOTE: The name "InferShape" may be not appropriate. "InferMeta" may be good. // Because functions in this file // not only can infer shape, but alse need infer lod or other useful data. -TensorMeta DotInferShape(const TensorMeta& x_meta, const TensorMeta& y_meta); +DenseTensorMeta DotInferShape(const DenseTensorMeta& x_meta, + const DenseTensorMeta& y_meta); } // namespace pten diff --git a/paddle/pten/infershape/unary.cc b/paddle/pten/infershape/unary.cc index 57e74345b7d42..4e743261b5906 100644 --- a/paddle/pten/infershape/unary.cc +++ b/paddle/pten/infershape/unary.cc @@ -17,18 +17,19 @@ limitations under the License. */ namespace pten { -TensorMeta UnchangedInferShape(const TensorMeta& x_meta) { return x_meta; } +DenseTensorMeta UnchangedInferShape(const DenseTensorMeta& x_meta) { + return x_meta; +} -TensorMeta ReductionInferShape(const TensorMeta& x_meta) { +DenseTensorMeta ReductionInferShape(const DenseTensorMeta& x_meta) { const auto& out_dims = paddle::framework::make_ddim({1}); - TensorMeta return_meta( - out_dims, x_meta.backend, x_meta.type, x_meta.layout, x_meta.offset); + DenseTensorMeta return_meta(x_meta.type, out_dims, x_meta.layout); return return_meta; } -TensorMeta FlattenInferShape(const TensorMeta& x_meta, - int start_axis, - int stop_axis) { +DenseTensorMeta FlattenInferShape(const DenseTensorMeta& x_meta, + int start_axis, + int stop_axis) { auto& x_dims = x_meta.dims; int in_dims_size = x_dims.size(); if (start_axis < 0) { @@ -62,8 +63,7 @@ TensorMeta FlattenInferShape(const TensorMeta& x_meta, out_shape.push_back(x_dims[i]); } const auto& out_dims = paddle::framework::make_ddim(out_shape); - TensorMeta return_meta( - out_dims, x_meta.backend, x_meta.type, x_meta.layout, x_meta.offset); + DenseTensorMeta return_meta(x_meta.type, out_dims, x_meta.layout); if (x_dims[0] == return_meta.dims[0]) { // Only pass LoD when the first dimension of output and Input(X) diff --git a/paddle/pten/infershape/unary.h b/paddle/pten/infershape/unary.h index 1d8fac05d0eaa..1db0b094eba3a 100644 --- a/paddle/pten/infershape/unary.h +++ b/paddle/pten/infershape/unary.h @@ -21,21 +21,24 @@ namespace pten { // Common InferShape Functions for unary operators, The format like: // -// 1. TensorMeta [OpName]InferShape(const TensorMeta& x_meta, ...) {} -// 2. std::pair [OpName]InferShape(const TensorMeta& +// 1. DenseTensorMeta [OpName]InferShape(const DenseTensorMeta& x_meta, ...) +// {} +// 2. std::pair [OpName]InferShape(const +// DenseTensorMeta& // x_meta, ...) {} -// 3. std::tuple [OpName]InferShape(const -// TensorMeta& x_meta, ...) +// 3. std::tuple +// [OpName]InferShape(const +// DenseTensorMeta& x_meta, ...) // NOTE: The name "InferShape" may be not appropriate. "InferMeta" may be good. // Because functions in this file // not only can infer shape, but alse need infer lod or other useful data. -TensorMeta UnchangedInferShape(const TensorMeta& x_meta); +DenseTensorMeta UnchangedInferShape(const DenseTensorMeta& x_meta); -TensorMeta ReductionInferShape(const TensorMeta& x_meta); +DenseTensorMeta ReductionInferShape(const DenseTensorMeta& x_meta); -TensorMeta FlattenInferShape(const TensorMeta& x_meta, - int start_axis, - int stop_axis); +DenseTensorMeta FlattenInferShape(const DenseTensorMeta& x_meta, + int start_axis, + int stop_axis); } // namespace pten diff --git a/paddle/pten/kernels/cpu/CMakeLists.txt b/paddle/pten/kernels/cpu/CMakeLists.txt index ad18a2f555265..2c4a424e48492 100644 --- a/paddle/pten/kernels/cpu/CMakeLists.txt +++ b/paddle/pten/kernels/cpu/CMakeLists.txt @@ -1,5 +1,5 @@ cc_library(math_cpu SRCS math.cc DEPS dense_tensor kernel_context kernel_factory eigen_function) cc_library(linalg_cpu SRCS linalg.cc DEPS dense_tensor kernel_context kernel_factory) cc_library(creation_cpu SRCS creation.cc DEPS dense_tensor kernel_context kernel_factory eigen_function) -cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory) +cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils) cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_cpu unary) diff --git a/paddle/pten/kernels/cpu/manipulation.cc b/paddle/pten/kernels/cpu/manipulation.cc index ac7a8eaba8cf5..c436e14e0caab 100644 --- a/paddle/pten/kernels/cpu/manipulation.cc +++ b/paddle/pten/kernels/cpu/manipulation.cc @@ -26,7 +26,7 @@ void Flatten(const CPUContext& dev_ctx, DenseTensor* out) { auto out_meta = FlattenInferShape(x.meta(), start_axis, stop_axis); pten::Copy(dev_ctx, x, out); - out->mutable_meta()->lod = out_meta.lod; + out->set_lod(out_meta.lod); out->Resize(out_meta.dims); } @@ -47,8 +47,8 @@ void FlattenWithXShape(const CPUContext& dev_ctx, for (int i = 0; i < in_dims.size(); ++i) { xshape_dims[i + 1] = in_dims[i]; } - xshape->mutable_meta()->dims = paddle::framework::make_ddim(xshape_dims); - xshape->mutable_meta()->lod = x.meta().lod; + xshape->Resize(paddle::framework::make_ddim(xshape_dims)); + xshape->set_lod(x.lod()); } } // namespace pten diff --git a/paddle/pten/kernels/cpu/utils.cc b/paddle/pten/kernels/cpu/utils.cc index b17b6512178d0..1f9d675deafa2 100644 --- a/paddle/pten/kernels/cpu/utils.cc +++ b/paddle/pten/kernels/cpu/utils.cc @@ -24,7 +24,6 @@ void Copy(const CPUContext& dev_ctx, const DenseTensor& src, DenseTensor* dst) { auto* dst_ptr = dst->mutable_data(); const auto& src_place = src.place(); const auto& dst_place = dst->place(); - src.CheckMemorySize(); if (src_ptr == dst_ptr && src_place == dst_place) { VLOG(3) << "Skip copy the same data async from " << src_place << " to " @@ -36,7 +35,7 @@ void Copy(const CPUContext& dev_ctx, const DenseTensor& src, DenseTensor* dst) { VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to " << dst_place; dst->Resize(src.dims()); - dst->mutable_meta()->layout = src.meta().layout; + CHECK(dst->layout() == src.layout()); auto size = src.numel() * paddle::framework::SizeOfType( TransToProtoVarType(src.data_type())); diff --git a/paddle/pten/kernels/cuda/CMakeLists.txt b/paddle/pten/kernels/cuda/CMakeLists.txt index 54df37ecb5e26..9e86d9521c99a 100644 --- a/paddle/pten/kernels/cuda/CMakeLists.txt +++ b/paddle/pten/kernels/cuda/CMakeLists.txt @@ -2,12 +2,12 @@ if(WITH_GPU) nv_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory) nv_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) nv_library(creation_cuda SRCS creation.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) - nv_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory) + nv_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils) nv_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary) elseif(WITH_ROCM) hip_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory) hip_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) hip_library(creation_cuda SRCS creation.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) - hip_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory) + hip_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils) hip_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary) endif() diff --git a/paddle/pten/kernels/cuda/manipulation.cu b/paddle/pten/kernels/cuda/manipulation.cu index 13bc109faaba3..43614f859c58b 100644 --- a/paddle/pten/kernels/cuda/manipulation.cu +++ b/paddle/pten/kernels/cuda/manipulation.cu @@ -26,7 +26,7 @@ void Flatten(const CUDAContext& dev_ctx, DenseTensor* out) { auto out_meta = FlattenInferShape(x.meta(), start_axis, stop_axis); pten::Copy(dev_ctx, x, out); - out->mutable_meta()->lod = out_meta.lod; + out->set_lod(out_meta.lod); out->Resize(out_meta.dims); } @@ -47,8 +47,8 @@ void FlattenWithXShape(const CUDAContext& dev_ctx, for (int i = 0; i < in_dims.size(); ++i) { xshape_dims[i + 1] = in_dims[i]; } - xshape->mutable_meta()->dims = paddle::framework::make_ddim(xshape_dims); - xshape->mutable_meta()->lod = x.meta().lod; + xshape->Resize(paddle::framework::make_ddim(xshape_dims)); + xshape->set_lod(x.lod()); } } // namespace pten diff --git a/paddle/pten/kernels/cuda/math.cu b/paddle/pten/kernels/cuda/math.cu index 4ebe58629545e..1f2a34ea505c2 100644 --- a/paddle/pten/kernels/cuda/math.cu +++ b/paddle/pten/kernels/cuda/math.cu @@ -30,6 +30,7 @@ namespace cub = hipcub; #include "paddle/fluid/platform/float16.h" #include "paddle/pten/core/convert_utils.h" #include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/hapi/lib/utils/tensor_utils.h" namespace pten { @@ -75,16 +76,21 @@ void Mean(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out) { nullptr, temp_storage_bytes, trans_x, out_data, size_prob, stream); PADDLE_ENFORCE_CUDA_SUCCESS(err); + const auto alloc = std::make_shared( + dev_ctx.GetPlace()); pten::DenseTensor tmp( - TensorMeta(paddle::framework::make_ddim( - {static_cast(temp_storage_bytes)}), - pten::TransToPtenBackend(dev_ctx.GetPlace()), - x.data_type(), - x.layout()), - TensorStatus()); - auto* temp_storage = tmp.mutable_data(); - err = cub::DeviceReduce::Sum( - temp_storage, temp_storage_bytes, trans_x, out_data, size_prob, stream); + alloc, + DenseTensorMeta(x.data_type(), + paddle::framework::make_ddim( + {static_cast(temp_storage_bytes)}), + x.layout())); + void* temp_storage = tmp.mutable_data(); + err = cub::DeviceReduce::Sum(static_cast(temp_storage), + temp_storage_bytes, + trans_x, + out_data, + size_prob, + stream); PADDLE_ENFORCE_CUDA_SUCCESS(err); } diff --git a/paddle/pten/kernels/cuda/utils.cu b/paddle/pten/kernels/cuda/utils.cu index 74e070880e106..e81e00a5873f7 100644 --- a/paddle/pten/kernels/cuda/utils.cu +++ b/paddle/pten/kernels/cuda/utils.cu @@ -27,7 +27,6 @@ void Copy(const CUDAContext& dev_ctx, auto* dst_ptr = dst->mutable_data(); const auto& src_place = src.place(); const auto& dst_place = dst->place(); - src.CheckMemorySize(); if (src_ptr == dst_ptr && src_place == dst_place) { VLOG(3) << "Skip copy the same data async from " << src_place << " to " @@ -39,7 +38,7 @@ void Copy(const CUDAContext& dev_ctx, VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to " << dst_place; dst->Resize(src.dims()); - dst->mutable_meta()->layout = src.meta().layout; + CHECK(dst->layout() == src.layout()); auto size = src.numel() * paddle::framework::SizeOfType( TransToProtoVarType(src.data_type())); diff --git a/paddle/pten/kernels/functions/eigen/dot.h b/paddle/pten/kernels/functions/eigen/dot.h index 605517bad6a9a..300da4ae1f13b 100644 --- a/paddle/pten/kernels/functions/eigen/dot.h +++ b/paddle/pten/kernels/functions/eigen/dot.h @@ -28,7 +28,6 @@ void Dot(const DevCtx& dev_ctx, const DenseTensor& x, const DenseTensor& y, DenseTensor* out) { - out->mutable_data(); if (1 == out->dims().size()) { auto eigen_out = pten::EigenScalar::From(*out); auto eigen_x = pten::EigenVector::Flatten(x); diff --git a/paddle/pten/kernels/functions/eigen/mean.h b/paddle/pten/kernels/functions/eigen/mean.h index 574a1957ae558..ee4bf1653f23a 100644 --- a/paddle/pten/kernels/functions/eigen/mean.h +++ b/paddle/pten/kernels/functions/eigen/mean.h @@ -25,8 +25,6 @@ namespace eigen { template void Mean(const DevCtx& dev_ctx, const DenseTensor& x, DenseTensor* out) { - out->mutable_data(); - // TODO(chenweihang): if we design new tensor, we should support // the low-level calc functor use new tensor as input, // which may be a big project! diff --git a/paddle/pten/tests/CMakeLists.txt b/paddle/pten/tests/CMakeLists.txt index d30ac2578d00b..21ce2f74df945 100644 --- a/paddle/pten/tests/CMakeLists.txt +++ b/paddle/pten/tests/CMakeLists.txt @@ -3,8 +3,8 @@ cc_test(pten_data_layout_test SRCS data_layout_test.cc DEPS gtest) cc_test(pten_data_type_test SRCS data_type_test.cc DEPS gtest) cc_test(dense_tensor_test SRCS dense_tensor_test.cc DEPS dense_tensor) cc_test(kernel_factory_test SRCS kernel_factory_test.cc DEPS kernel_factory) -cc_test(test_mean_api SRCS test_mean_api.cc DEPS math_api) -cc_test(test_dot_api SRCS test_dot_api.cc DEPS linalg_api) -cc_test(test_fill_api SRCS test_fill_api.cc DEPS creation_api) -cc_test(test_copy_api SRCS test_copy_api.cc DEPS utils_cpu) -cc_test(test_flatten_api SRCS test_flatten_api.cc DEPS utils_cpu manipulation_api) +cc_test(test_mean_api SRCS test_mean_api.cc DEPS math_api pten_hapi_utils) +cc_test(test_dot_api SRCS test_dot_api.cc DEPS linalg_api pten_hapi_utils) +cc_test(test_fill_api SRCS test_fill_api.cc DEPS creation_api pten_hapi_utils) +cc_test(test_copy_api SRCS test_copy_api.cc DEPS utils_cpu pten_hapi_utils) +cc_test(test_flatten_api SRCS test_flatten_api.cc DEPS utils_cpu manipulation_api pten_hapi_utils) diff --git a/paddle/pten/tests/dense_tensor_test.cc b/paddle/pten/tests/dense_tensor_test.cc index 722eab17ec412..e74917263dafb 100644 --- a/paddle/pten/tests/dense_tensor_test.cc +++ b/paddle/pten/tests/dense_tensor_test.cc @@ -18,16 +18,3 @@ limitations under the License. */ namespace framework = paddle::framework; using DDim = paddle::framework::DDim; - -TEST(DenseTensor, Constructor) { - pten::DenseTensor tensor(pten::TensorMeta(framework::make_ddim({5, 10}), - pten::Backend::CPU, - pten::DataType::FLOAT32, - pten::DataLayout::NCHW, - 0UL), - pten::TensorStatus()); - ASSERT_EQ(tensor.dims().size(), 2); - ASSERT_EQ(tensor.backend(), pten::Backend::CPU); - ASSERT_EQ(tensor.data_type(), pten::DataType::FLOAT32); - ASSERT_EQ(tensor.layout(), pten::DataLayout::NCHW); -} diff --git a/paddle/pten/tests/test_copy_api.cc b/paddle/pten/tests/test_copy_api.cc index 39533c73a2564..fcebe9a310dea 100644 --- a/paddle/pten/tests/test_copy_api.cc +++ b/paddle/pten/tests/test_copy_api.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/pten/kernels/cpu/utils.h" #include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/hapi/lib/utils/allocator.h" PT_DECLARE_MODULE(UtilsCPU); @@ -30,20 +31,20 @@ using DDim = paddle::framework::DDim; // 'paddle/api', TEST(API, copy) { // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); auto dense_src = std::make_shared( - pten::TensorMeta(framework::make_ddim({2, 3}), - pten::Backend::CPU, - pten::DataType::FLOAT32, - pten::DataLayout::NCHW), - pten::TensorStatus()); + alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({2, 3}), + pten::DataLayout::NCHW)); auto* dense_x_data = dense_src->mutable_data(); auto dense_dst = std::make_shared( - pten::TensorMeta(framework::make_ddim({2, 3}), - pten::Backend::CPU, - pten::DataType::FLOAT32, - pten::DataLayout::NCHW), - pten::TensorStatus()); + alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({2, 3}), + pten::DataLayout::NCHW)); for (size_t i = 0; i < 2; ++i) { for (size_t j = 0; j < 3; ++j) { diff --git a/paddle/pten/tests/test_dot_api.cc b/paddle/pten/tests/test_dot_api.cc index affa18469ec21..69e785904fe3c 100644 --- a/paddle/pten/tests/test_dot_api.cc +++ b/paddle/pten/tests/test_dot_api.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/hapi/lib/utils/allocator.h" PT_DECLARE_MODULE(LinalgCPU); @@ -32,20 +33,20 @@ using DDim = paddle::framework::DDim; // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, dot) { // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - pten::TensorMeta(framework::make_ddim({3, 10}), - pten::Backend::CPU, - pten::DataType::FLOAT32, - pten::DataLayout::NCHW), - pten::TensorStatus()); + alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 10}), + pten::DataLayout::NCHW)); auto* dense_x_data = dense_x->mutable_data(); auto dense_y = std::make_shared( - pten::TensorMeta(framework::make_ddim({3, 10}), - pten::Backend::CPU, - pten::DataType::FLOAT32, - pten::DataLayout::NCHW), - pten::TensorStatus()); + alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 10}), + pten::DataLayout::NCHW)); auto* dense_y_data = dense_y->mutable_data(); float sum[3] = {0.0, 0.0, 0.0}; diff --git a/paddle/pten/tests/test_fill_api.cc b/paddle/pten/tests/test_fill_api.cc index afb36f95e8a1e..c19d14efaa976 100644 --- a/paddle/pten/tests/test_fill_api.cc +++ b/paddle/pten/tests/test_fill_api.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/hapi/lib/utils/allocator.h" PT_DECLARE_MODULE(CreationCPU); @@ -32,12 +33,14 @@ using DDim = paddle::framework::DDim; // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, full_like) { // 1. create tensor + // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - pten::TensorMeta(framework::make_ddim({3, 2}), - pten::Backend::CPU, - pten::DataType::FLOAT32, - pten::DataLayout::NCHW), - pten::TensorStatus()); + alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 2}), + pten::DataLayout::NCHW)); auto* dense_x_data = dense_x->mutable_data(); dense_x_data[0] = 0; @@ -66,12 +69,13 @@ TEST(API, full_like) { TEST(API, zeros_like) { // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - pten::TensorMeta(framework::make_ddim({3, 2}), - pten::Backend::CPU, - pten::DataType::FLOAT32, - pten::DataLayout::NCHW), - pten::TensorStatus()); + alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 2}), + pten::DataLayout::NCHW)); auto* dense_x_data = dense_x->mutable_data(); dense_x_data[0] = 1; @@ -98,13 +102,14 @@ TEST(API, zeros_like) { TEST(API, ones_like) { // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - pten::TensorMeta(framework::make_ddim({3, 2}), - pten::Backend::CPU, - pten::DataType::FLOAT32, - pten::DataLayout::NCHW), - pten::TensorStatus()); - auto* dense_x_data = dense_x->mutable_data(); + alloc, + pten::DenseTensorMeta(pten::DataType::INT32, + framework::make_ddim({3, 2}), + pten::DataLayout::NCHW)); + auto* dense_x_data = dense_x->mutable_data(); dense_x_data[0] = 0; paddle::experimental::Tensor x(dense_x); @@ -122,7 +127,7 @@ TEST(API, ones_like) { ASSERT_EQ(out.initialized(), true); auto dense_out = std::dynamic_pointer_cast(out.impl()); - auto* actual_result = dense_out->data(); + auto* actual_result = dense_out->data(); for (auto i = 0; i < 6; i++) { ASSERT_EQ(actual_result[i], 1); } diff --git a/paddle/pten/tests/test_flatten_api.cc b/paddle/pten/tests/test_flatten_api.cc index 7f68cd75bc8d2..48d2205c2ff48 100644 --- a/paddle/pten/tests/test_flatten_api.cc +++ b/paddle/pten/tests/test_flatten_api.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/hapi/lib/utils/allocator.h" PT_DECLARE_MODULE(ManipulationCPU); @@ -32,12 +33,13 @@ using DDim = paddle::framework::DDim; // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, flatten) { // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - pten::TensorMeta(framework::make_ddim({3, 2, 2, 3}), - pten::Backend::CPU, - pten::DataType::FLOAT32, - pten::DataLayout::NCHW), - pten::TensorStatus()); + alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 2, 2, 3}), + pten::DataLayout::NCHW)); auto* dense_x_data = dense_x->mutable_data(); for (int i = 0; i < dense_x->numel(); i++) { diff --git a/paddle/pten/tests/test_mean_api.cc b/paddle/pten/tests/test_mean_api.cc index 9c0472916e01d..ee8388671b7eb 100644 --- a/paddle/pten/tests/test_mean_api.cc +++ b/paddle/pten/tests/test_mean_api.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/hapi/lib/utils/allocator.h" PT_DECLARE_MODULE(MathCPU); @@ -32,12 +33,13 @@ using DDim = paddle::framework::DDim; // TODO(chenweihang): Remove this test after the API is used in the dygraph TEST(API, mean) { // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); auto dense_x = std::make_shared( - pten::TensorMeta(framework::make_ddim({3, 4}), - pten::Backend::CPU, - pten::DataType::FLOAT32, - pten::DataLayout::NCHW), - pten::TensorStatus()); + alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 4}), + pten::DataLayout::NCHW)); auto* dense_x_data = dense_x->mutable_data(); float sum = 0.0;