From 9ba08b10c85a86cf2c1bae1d0447c0f066f00084 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Tue, 16 Nov 2021 11:28:38 +0000 Subject: [PATCH 01/17] rename TensorBase interface data_type() to dtype() --- paddle/pten/api/lib/tensor.cc | 4 ++-- paddle/pten/api/lib/utils/tensor_utils.cc | 2 +- paddle/pten/core/compat_utils.h | 2 +- paddle/pten/core/dense_tensor.cc | 16 +++++++--------- paddle/pten/core/dense_tensor.h | 2 +- paddle/pten/core/tensor_base.h | 2 +- paddle/pten/kernels/cpu/utils.cc | 4 ++-- paddle/pten/kernels/cuda/math.cu | 2 +- paddle/pten/kernels/cuda/utils.cu | 4 ++-- paddle/pten/kernels/xpu/utils.cc | 4 ++-- paddle/pten/tests/api/test_tensor_utils.cc | 11 +++++------ paddle/pten/tests/core/test_dense_tensor.cc | 2 +- 12 files changed, 26 insertions(+), 29 deletions(-) diff --git a/paddle/pten/api/lib/tensor.cc b/paddle/pten/api/lib/tensor.cc index 5d10c7209e2b8..bb3fba885862b 100644 --- a/paddle/pten/api/lib/tensor.cc +++ b/paddle/pten/api/lib/tensor.cc @@ -109,9 +109,9 @@ void Tensor::reshape(const std::vector &shape) { "and it will be implemented by calling the reshape kernel later.")); } -DataType Tensor::dtype() const { return impl_->data_type(); } +DataType Tensor::dtype() const { return impl_->dtype(); } -DataType Tensor::type() const { return impl_->data_type(); } +DataType Tensor::type() const { return impl_->dtype(); } DataLayout Tensor::layout() const { return impl_->layout(); } diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc index 2c362a11b11d9..b02392e5763be 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.cc +++ b/paddle/pten/api/lib/utils/tensor_utils.cc @@ -125,7 +125,7 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { auto storage = src->release(); std::shared_ptr holder( new TensorStorage(std::move(storage))); - dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->data_type())); + dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->dtype())); } void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) { diff --git a/paddle/pten/core/compat_utils.h b/paddle/pten/core/compat_utils.h index dc65e04b3ae73..a0602f33e3de2 100644 --- a/paddle/pten/core/compat_utils.h +++ b/paddle/pten/core/compat_utils.h @@ -74,7 +74,7 @@ class CompatibleDenseTensorUtils { ret.meta_.dims[0] = end_idx - begin_idx; ret.meta_.offset = tensor->meta_.offset + begin_idx * (tensor->numel() / tensor->dims()[0]) * - paddle::experimental::SizeOf(tensor->data_type()); + paddle::experimental::SizeOf(tensor->dtype()); } return ret; } diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc index fbc2a24312941..bb38c53ada04e 100644 --- a/paddle/pten/core/dense_tensor.cc +++ b/paddle/pten/core/dense_tensor.cc @@ -24,14 +24,12 @@ namespace pten { DenseTensor::DenseTensor(const std::shared_ptr& a, const DenseTensorMeta& meta) : meta_(meta), - storage_( - make_intrusive(a, SizeOf(data_type()) * numel())) {} + storage_(make_intrusive(a, SizeOf(dtype()) * numel())) {} DenseTensor::DenseTensor(const std::shared_ptr& a, DenseTensorMeta&& meta) : meta_(std::move(meta)), - storage_( - make_intrusive(a, SizeOf(data_type()) * numel())) {} + storage_(make_intrusive(a, SizeOf(dtype()) * numel())) {} DenseTensor::DenseTensor(intrusive_ptr storage, const DenseTensorMeta& meta) @@ -60,7 +58,7 @@ void* DenseTensor::mutable_data(size_t request_bytes) { storage_, paddle::platform::errors::PreconditionNotMet( "The storage must be valid when call the mutable data function.")); - size_t bytes = numel() * SizeOf(data_type()); + size_t bytes = numel() * SizeOf(dtype()); if (request_bytes) { PADDLE_ENFORCE_GE(request_bytes, bytes, @@ -87,19 +85,19 @@ T* DenseTensor::mutable_data() { paddle::experimental::CppTypeToDataType::Type(); } PADDLE_ENFORCE( - (data_type() == paddle::experimental::CppTypeToDataType::Type()), + (dtype() == paddle::experimental::CppTypeToDataType::Type()), paddle::platform::errors::InvalidArgument( "The type of data (%d) we are trying to retrieve does not match the " "type of data currently contained in the container (%d).", static_cast(paddle::experimental::CppTypeToDataType::Type()), - static_cast(data_type()))); + static_cast(dtype()))); return static_cast(mutable_data()); } template const T* DenseTensor::data() const { PADDLE_ENFORCE( - (data_type() == paddle::experimental::CppTypeToDataType::Type()), + (dtype() == paddle::experimental::CppTypeToDataType::Type()), paddle::platform::errors::InvalidArgument( "The type of data we are trying to retrieve does not match the " "type of data currently contained in the container.")); @@ -115,7 +113,7 @@ const void* DenseTensor::data() const { } void DenseTensor::check_memory_size() const { - size_t bytes = numel() * SizeOf(data_type()); + size_t bytes = numel() * SizeOf(dtype()); PADDLE_ENFORCE_GE(memory_size(), bytes, paddle::platform::errors::InvalidArgument( diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h index b0a4195bc6cec..8ece80f529161 100644 --- a/paddle/pten/core/dense_tensor.h +++ b/paddle/pten/core/dense_tensor.h @@ -93,7 +93,7 @@ class DenseTensor : public TensorBase, /// \brief Returns the data type of the tensor. /// \return The data type of the tensor. - DataType data_type() const noexcept override { return meta_.type; } + DataType dtype() const noexcept override { return meta_.type; } /// \brief Returns the data layout of the tensor. /// \return The data layout of the tensor. diff --git a/paddle/pten/core/tensor_base.h b/paddle/pten/core/tensor_base.h index 79fd742aea10b..528a52cee8da4 100644 --- a/paddle/pten/core/tensor_base.h +++ b/paddle/pten/core/tensor_base.h @@ -43,7 +43,7 @@ class TensorBase { /// \brief Returns the data type of the tensor. /// \return The data type of the tensor. - virtual DataType data_type() const = 0; + virtual DataType dtype() const = 0; /// \brief Returns the data layout of the tensor. /// \return The data layout of the tensor. diff --git a/paddle/pten/kernels/cpu/utils.cc b/paddle/pten/kernels/cpu/utils.cc index 3e0bfccb1ec72..e089eabb0e533 100644 --- a/paddle/pten/kernels/cpu/utils.cc +++ b/paddle/pten/kernels/cpu/utils.cc @@ -38,8 +38,8 @@ void Copy(const CPUContext& dev_ctx, const DenseTensor& src, DenseTensor* dst) { VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; CHECK(dst->layout() == src.layout()); - auto size = src.numel() * paddle::framework::SizeOfType( - TransToProtoVarType(src.data_type())); + auto size = src.numel() * + paddle::framework::SizeOfType(TransToProtoVarType(src.dtype())); if (paddle::platform::is_cpu_place(src_place) && paddle::platform::is_cpu_place(dst_place)) { diff --git a/paddle/pten/kernels/cuda/math.cu b/paddle/pten/kernels/cuda/math.cu index 73a743d58e6a9..6a64290d39837 100644 --- a/paddle/pten/kernels/cuda/math.cu +++ b/paddle/pten/kernels/cuda/math.cu @@ -81,7 +81,7 @@ void Mean(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out) { dev_ctx.GetPlace()); pten::DenseTensor tmp( alloc, - DenseTensorMeta(x.data_type(), + DenseTensorMeta(x.dtype(), paddle::framework::make_ddim( {static_cast(temp_storage_bytes)}), x.layout())); diff --git a/paddle/pten/kernels/cuda/utils.cu b/paddle/pten/kernels/cuda/utils.cu index c3940b42ca46e..04cf1413cba68 100644 --- a/paddle/pten/kernels/cuda/utils.cu +++ b/paddle/pten/kernels/cuda/utils.cu @@ -48,8 +48,8 @@ void Copy(const CUDAContext& dev_ctx, VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; CHECK(dst->layout() == src.layout()); - auto size = src.numel() * paddle::framework::SizeOfType( - TransToProtoVarType(src.data_type())); + auto size = src.numel() * + paddle::framework::SizeOfType(TransToProtoVarType(src.dtype())); if (paddle::platform::is_cuda_pinned_place(src_place) && // NOLINT paddle::platform::is_cuda_pinned_place(dst_place)) { diff --git a/paddle/pten/kernels/xpu/utils.cc b/paddle/pten/kernels/xpu/utils.cc index 33bdc66ff01f3..9bfe493f5ff9d 100644 --- a/paddle/pten/kernels/xpu/utils.cc +++ b/paddle/pten/kernels/xpu/utils.cc @@ -38,8 +38,8 @@ void Copy(const XPUDeviceContext& dev_ctx, << dst_place; dst->Resize(src.dims()); CHECK(dst->layout() == src.layout()); - auto size = src.numel() * paddle::framework::SizeOfType( - TransToProtoVarType(src.data_type())); + auto size = src.numel() * + paddle::framework::SizeOfType(TransToProtoVarType(src.dtype())); if (paddle::platform::is_xpu_place(src_place) && // NOLINT paddle::platform::is_cpu_place(dst_place)) { diff --git a/paddle/pten/tests/api/test_tensor_utils.cc b/paddle/pten/tests/api/test_tensor_utils.cc index fd52b96542c71..cebb0fc07ee63 100644 --- a/paddle/pten/tests/api/test_tensor_utils.cc +++ b/paddle/pten/tests/api/test_tensor_utils.cc @@ -47,8 +47,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) { CHECK(dense_tensor.lod().size() == lod_tensor.lod().size()); CHECK(dense_tensor.lod()[0] == static_cast>((lod_tensor.lod()[0]))); - CHECK(dense_tensor.data_type() == - pten::TransToPtenDataType(lod_tensor.type())); + CHECK(dense_tensor.dtype() == pten::TransToPtenDataType(lod_tensor.type())); CHECK(dense_tensor.layout() == pten::TransToPtenDataLayout(lod_tensor.layout())); CHECK(platform::is_cpu_place(lod_tensor.place())); @@ -58,7 +57,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) { auto dense_tensor_1 = MakePtenDenseTensor(lod_tensor); CHECK(dense_tensor_1->dims() == dims); - CHECK(dense_tensor_1->data_type() == dtype); + CHECK(dense_tensor_1->dtype() == dtype); CHECK(dense_tensor_1->layout() == layout); CHECK(dense_tensor_1->lod().size() == lod.size()); CHECK(dense_tensor_1->lod()[0] == lod[0]); @@ -83,7 +82,7 @@ TEST(tensor_utils, dense_tensor_to_tensor) { framework::Tensor tensor; MovesStorage(&dense_tensor, &tensor); - CHECK(dense_tensor.data_type() == pten::TransToPtenDataType(tensor.type())); + CHECK(dense_tensor.dtype() == pten::TransToPtenDataType(tensor.type())); CHECK(dense_tensor.layout() == pten::TransToPtenDataLayout(tensor.layout())); CHECK(platform::is_cpu_place(tensor.place())); @@ -92,7 +91,7 @@ TEST(tensor_utils, dense_tensor_to_tensor) { auto dense_tensor_1 = MakePtenDenseTensor(tensor); CHECK(dense_tensor_1->dims() == dims); - CHECK(dense_tensor_1->data_type() == dtype); + CHECK(dense_tensor_1->dtype() == dtype); CHECK(dense_tensor_1->layout() == layout); const float* data_1 = dense_tensor_1->data(); CHECK(data_1[0] == 1.0f); @@ -117,7 +116,7 @@ TEST(PtenUtils, VarToPtTensor) { // 2. test API auto tensor_x = MakePtenTensorBaseFromVar(v, tensor_def); // 3. check result - ASSERT_EQ(tensor_x->data_type(), pten::DataType::INT32); + ASSERT_EQ(tensor_x->dtype(), pten::DataType::INT32); } } // namespace tests diff --git a/paddle/pten/tests/core/test_dense_tensor.cc b/paddle/pten/tests/core/test_dense_tensor.cc index dac2575713bfb..69c5e9b12606b 100644 --- a/paddle/pten/tests/core/test_dense_tensor.cc +++ b/paddle/pten/tests/core/test_dense_tensor.cc @@ -82,7 +82,7 @@ TEST(dense_tensor, ctor) { bool r{true}; r = r && (t.numel() == product(m.dims)); r = r && (t.dims() == m.dims); - r = r && (t.data_type() == m.type); + r = r && (t.dtype() == m.type); r = r && (t.layout() == m.layout); r = r && (t.place() == paddle::platform::CPUPlace()); r = r && t.initialized(); From 3c1afc0598df2ca15dc6b40f0484a56c92ac97ba Mon Sep 17 00:00:00 2001 From: zyfncg Date: Wed, 17 Nov 2021 05:12:57 +0000 Subject: [PATCH 02/17] rename type to dtype of TensorMeta --- paddle/pten/api/lib/utils/tensor_utils.cc | 4 ++-- paddle/pten/core/dense_tensor.cc | 4 ++-- paddle/pten/core/dense_tensor.h | 2 +- paddle/pten/core/tensor_meta.h | 24 +++++++++---------- paddle/pten/infermeta/binary.cc | 6 ++--- paddle/pten/infermeta/unary.cc | 8 +++---- paddle/pten/tests/core/test_dense_tensor.cc | 12 +++++----- paddle/pten/tests/kernels/test_dot_dev_api.cc | 2 +- .../tests/kernels/test_elementwise_dev_api.cc | 2 +- .../pten/tests/kernels/test_fill_dev_api.cc | 2 +- .../tests/kernels/test_flatten_dev_api.cc | 2 +- .../pten/tests/kernels/test_mean_dev_api.cc | 2 +- .../tests/kernels/test_reshape_dev_api.cc | 2 +- .../pten/tests/kernels/test_scale_dev_api.cc | 4 ++-- 14 files changed, 38 insertions(+), 38 deletions(-) diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc index b02392e5763be..4936006d26f8a 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.cc +++ b/paddle/pten/api/lib/utils/tensor_utils.cc @@ -146,7 +146,7 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); meta->dims = src.dims(); // Since the type of DenseTensorMeta is const, const_cast must be used - const_cast(meta->type) = pten::TransToPtenDataType(src.type()); + const_cast(meta->dtype) = pten::TransToPtenDataType(src.type()); // Since the type of DenseTensorMeta is const, const_cast must be used const_cast(meta->layout) = pten::TransToPtenDataLayout(src.layout()); @@ -164,7 +164,7 @@ void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src, auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); meta->dims = src.dims(); // Since the type of DenseTensorMeta is const, const_cast must be used - const_cast(meta->type) = pten::TransToPtenDataType(src.type()); + const_cast(meta->dtype) = pten::TransToPtenDataType(src.type()); // Since the type of DenseTensorMeta is const, const_cast must be used const_cast(meta->layout) = pten::TransToPtenDataLayout(src.layout()); diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc index bb38c53ada04e..701ccb509f15c 100644 --- a/paddle/pten/core/dense_tensor.cc +++ b/paddle/pten/core/dense_tensor.cc @@ -80,8 +80,8 @@ T* DenseTensor::mutable_data() { // In order to be compatible with the original Tensor design and // execution system, we have to reset the datatype in mutable_data. // When the compatibility phase is over in the future, we can delete it - if (meta_.type == DataType::UNDEFINED) { - const_cast(meta_.type) = + if (meta_.dtype == DataType::UNDEFINED) { + const_cast(meta_.dtype) = paddle::experimental::CppTypeToDataType::Type(); } PADDLE_ENFORCE( diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h index 8ece80f529161..6d938d8ab5790 100644 --- a/paddle/pten/core/dense_tensor.h +++ b/paddle/pten/core/dense_tensor.h @@ -93,7 +93,7 @@ class DenseTensor : public TensorBase, /// \brief Returns the data type of the tensor. /// \return The data type of the tensor. - DataType dtype() const noexcept override { return meta_.type; } + DataType dtype() const noexcept override { return meta_.dtype; } /// \brief Returns the data layout of the tensor. /// \return The data layout of the tensor. diff --git a/paddle/pten/core/tensor_meta.h b/paddle/pten/core/tensor_meta.h index eae270171d88e..7cbc919dab985 100644 --- a/paddle/pten/core/tensor_meta.h +++ b/paddle/pten/core/tensor_meta.h @@ -39,9 +39,9 @@ struct DenseTensorMeta { using DataLayout = paddle::experimental::DataLayout; DenseTensorMeta() = default; - DenseTensorMeta(DataType type, const DDim& dims); - DenseTensorMeta(DataType type, const DDim& dims, DataLayout layout); - DenseTensorMeta(DataType type, + DenseTensorMeta(DataType dtype, const DDim& dims); + DenseTensorMeta(DataType dtype, const DDim& dims, DataLayout layout); + DenseTensorMeta(DataType dtype, const DDim& dims, DataLayout layout, const std::vector>& lod); @@ -54,30 +54,30 @@ struct DenseTensorMeta { /// marked with `const` are expected to remain unchanged. const bool is_scalar{false}; DDim dims; - const DataType type{DataType::UNDEFINED}; + const DataType dtype{DataType::UNDEFINED}; const DataLayout layout{DataLayout::NCHW}; LoD lod; size_t offset{0}; }; -inline DenseTensorMeta::DenseTensorMeta(DataType type, const DDim& dims) - : dims(dims), type(type) {} +inline DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims) + : dims(dims), dtype(dtype) {} -inline DenseTensorMeta::DenseTensorMeta(DataType type, +inline DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims, DataLayout layout) - : dims(dims), type(type), layout(layout) {} + : dims(dims), dtype(dtype), layout(layout) {} inline DenseTensorMeta::DenseTensorMeta( - DataType type, + DataType dtype, const DDim& dims, DataLayout layout, const std::vector>& lod) - : dims(dims), type(type), layout(layout), lod(lod) {} + : dims(dims), dtype(dtype), layout(layout), lod(lod) {} inline bool DenseTensorMeta::valid() const noexcept { bool valid{true}; - valid = valid && (type != DataType::UNDEFINED); + valid = valid && (dtype != DataType::UNDEFINED); valid = valid && (layout != DataLayout::UNDEFINED); valid = valid && (is_scalar || product(dims) >= 0); return valid; @@ -86,7 +86,7 @@ inline bool DenseTensorMeta::valid() const noexcept { inline bool operator==(const DenseTensorMeta& lhs, const DenseTensorMeta& rhs) { bool ret = true; return ret && (lhs.is_scalar == rhs.is_scalar) && (lhs.dims == rhs.dims) && - (lhs.type == rhs.type) && (lhs.layout == rhs.layout) && + (lhs.dtype == rhs.dtype) && (lhs.layout == rhs.layout) && (lhs.lod == rhs.lod) && (lhs.offset == rhs.offset); } diff --git a/paddle/pten/infermeta/binary.cc b/paddle/pten/infermeta/binary.cc index e124466a6d33a..838e450007fcd 100644 --- a/paddle/pten/infermeta/binary.cc +++ b/paddle/pten/infermeta/binary.cc @@ -56,7 +56,7 @@ DenseTensorMeta DotInferShape(const DenseTensorMeta& x_meta, y_dims.to_str())); x_dims[x_dims.size() - 1] = 1; - DenseTensorMeta return_meta(x_meta.type, x_dims, x_meta.layout); + DenseTensorMeta return_meta(x_meta.dtype, x_dims, x_meta.layout); return return_meta; } @@ -127,13 +127,13 @@ DenseTensorMeta MatmulInferShape(const DenseTensorMeta& x_meta, auto ddim_out = paddle::framework::make_ddim(new_dims); - return {x_meta.type, ddim_out, x_meta.layout}; + return {x_meta.dtype, ddim_out, x_meta.layout}; } DenseTensorMeta ElementwiseInferShape(const DenseTensorMeta& x_meta, const DenseTensorMeta& y_meta, int axis) { - DenseTensorMeta return_meta(x_meta.type, x_meta.dims, x_meta.layout); + DenseTensorMeta return_meta(x_meta.dtype, x_meta.dims, x_meta.layout); if (x_meta.dims != y_meta.dims) { auto x_dims = x_meta.dims; auto y_dims = y_meta.dims; diff --git a/paddle/pten/infermeta/unary.cc b/paddle/pten/infermeta/unary.cc index 5099984886cce..ea6e97db3460d 100644 --- a/paddle/pten/infermeta/unary.cc +++ b/paddle/pten/infermeta/unary.cc @@ -23,7 +23,7 @@ DenseTensorMeta UnchangedInferShape(const DenseTensorMeta& x_meta) { DenseTensorMeta ReductionInferShape(const DenseTensorMeta& x_meta) { const auto& out_dims = paddle::framework::make_ddim({1}); - DenseTensorMeta return_meta(x_meta.type, out_dims, x_meta.layout); + DenseTensorMeta return_meta(x_meta.dtype, out_dims, x_meta.layout); return return_meta; } @@ -63,7 +63,7 @@ DenseTensorMeta FlattenInferShape(const DenseTensorMeta& x_meta, out_shape.push_back(x_dims[i]); } const auto& out_dims = paddle::framework::make_ddim(out_shape); - DenseTensorMeta return_meta(x_meta.type, out_dims, x_meta.layout); + DenseTensorMeta return_meta(x_meta.dtype, out_dims, x_meta.layout); if (x_dims[0] == return_meta.dims[0]) { // Only pass LoD when the first dimension of output and Input(X) @@ -77,7 +77,7 @@ DenseTensorMeta FlattenInferShape(const DenseTensorMeta& x_meta, DenseTensorMeta FullLikeInferShape(const DenseTensorMeta& x_meta, DataType dtype, DataLayout layout) { - return {dtype == DataType::UNDEFINED ? x_meta.type : dtype, + return {dtype == DataType::UNDEFINED ? x_meta.dtype : dtype, x_meta.dims, layout == DataLayout::UNDEFINED ? x_meta.layout : layout}; } @@ -211,7 +211,7 @@ DenseTensorMeta InferShapeFromVecValue(const DenseTensorMeta& x_meta, "But received 'shape' is empty.")); auto x_dims = x_meta.dims; auto out_dims = ValidateShape(shape, x_dims); - DenseTensorMeta return_meta(x_meta.type, out_dims, x_meta.layout); + DenseTensorMeta return_meta(x_meta.dtype, out_dims, x_meta.layout); if (x_dims[0] == return_meta.dims[0]) { // Only pass LoD when the first dimension of output and Input(X) // are the same. diff --git a/paddle/pten/tests/core/test_dense_tensor.cc b/paddle/pten/tests/core/test_dense_tensor.cc index 69c5e9b12606b..fae4a5415a372 100644 --- a/paddle/pten/tests/core/test_dense_tensor.cc +++ b/paddle/pten/tests/core/test_dense_tensor.cc @@ -31,32 +31,32 @@ TEST(dense_tensor, meta) { CHECK(!meta_0.valid()); DenseTensorMeta meta_1(dtype, dims); - CHECK(meta_1.type == dtype); + CHECK(meta_1.dtype == dtype); CHECK(meta_1.dims == dims); CHECK(meta_1.valid()); DenseTensorMeta meta_2(dtype, dims, layout); - CHECK(meta_2.type == dtype); + CHECK(meta_2.dtype == dtype); CHECK(meta_2.dims == dims); CHECK(meta_2.layout == layout); CHECK(meta_2.valid()); DenseTensorMeta meta_3(dtype, dims, layout, lod); - CHECK(meta_3.type == dtype); + CHECK(meta_3.dtype == dtype); CHECK(meta_3.dims == dims); CHECK(meta_3.layout == layout); CHECK(meta_3.lod == lod); CHECK(meta_3.valid()); DenseTensorMeta meta_4(meta_3); - CHECK(meta_4.type == dtype); + CHECK(meta_4.dtype == dtype); CHECK(meta_4.dims == dims); CHECK(meta_4.layout == layout); CHECK(meta_4.lod == lod); CHECK(meta_4.valid()); DenseTensorMeta meta_5(std::move(meta_4)); - CHECK(meta_5.type == dtype); + CHECK(meta_5.dtype == dtype); CHECK(meta_5.dims == dims); CHECK(meta_5.layout == layout); CHECK(meta_5.lod == lod); @@ -82,7 +82,7 @@ TEST(dense_tensor, ctor) { bool r{true}; r = r && (t.numel() == product(m.dims)); r = r && (t.dims() == m.dims); - r = r && (t.dtype() == m.type); + r = r && (t.dtype() == m.dtype); r = r && (t.layout() == m.layout); r = r && (t.place() == paddle::platform::CPUPlace()); r = r && t.initialized(); diff --git a/paddle/pten/tests/kernels/test_dot_dev_api.cc b/paddle/pten/tests/kernels/test_dot_dev_api.cc index 2276d49590a70..5485ef2843c2c 100644 --- a/paddle/pten/tests/kernels/test_dot_dev_api.cc +++ b/paddle/pten/tests/kernels/test_dot_dev_api.cc @@ -62,7 +62,7 @@ TEST(DEV_API, dot) { // 3. check result ASSERT_EQ(out.dims().size(), 2); ASSERT_EQ(out.dims()[0], 3); - ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32); + ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32); ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); auto expect_result = sum; diff --git a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc index f6b93b731865c..f2525ae800acc 100644 --- a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc +++ b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc @@ -65,7 +65,7 @@ TEST(DEV_API, elementwise_add) { // 3. check result ASSERT_EQ(dense_out.dims().size(), 2); ASSERT_EQ(dense_out.dims()[0], 3); - ASSERT_EQ(dense_out.meta().type, pten::DataType::FLOAT32); + ASSERT_EQ(dense_out.meta().dtype, pten::DataType::FLOAT32); ASSERT_EQ(dense_out.meta().layout, pten::DataLayout::NCHW); auto expect_result = sum; diff --git a/paddle/pten/tests/kernels/test_fill_dev_api.cc b/paddle/pten/tests/kernels/test_fill_dev_api.cc index 6e6af22f6de89..aa66877881b66 100644 --- a/paddle/pten/tests/kernels/test_fill_dev_api.cc +++ b/paddle/pten/tests/kernels/test_fill_dev_api.cc @@ -50,7 +50,7 @@ TEST(DEV_API, fill_any_like) { ASSERT_EQ(out.dims().size(), 2); ASSERT_EQ(out.dims()[0], 3); ASSERT_EQ(out.numel(), 6); - ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32); + ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32); ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); auto* actual_result = out.data(); diff --git a/paddle/pten/tests/kernels/test_flatten_dev_api.cc b/paddle/pten/tests/kernels/test_flatten_dev_api.cc index b027c75a37b31..a9be6108d24b6 100644 --- a/paddle/pten/tests/kernels/test_flatten_dev_api.cc +++ b/paddle/pten/tests/kernels/test_flatten_dev_api.cc @@ -56,7 +56,7 @@ TEST(DEV_API, flatten) { ASSERT_EQ(out.dims()[1], expect_shape[1]); ASSERT_EQ(out.dims()[2], expect_shape[2]); ASSERT_EQ(out.numel(), 36); - ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32); + ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32); ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); bool value_equal = true; diff --git a/paddle/pten/tests/kernels/test_mean_dev_api.cc b/paddle/pten/tests/kernels/test_mean_dev_api.cc index 1ae59ff8034f5..b16d339e18af3 100644 --- a/paddle/pten/tests/kernels/test_mean_dev_api.cc +++ b/paddle/pten/tests/kernels/test_mean_dev_api.cc @@ -49,7 +49,7 @@ TEST(DEV_API, mean) { // 3. check result ASSERT_EQ(out.dims().size(), 1); ASSERT_EQ(out.numel(), 1); - ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32); + ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32); ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); auto expect_result = sum / 12; diff --git a/paddle/pten/tests/kernels/test_reshape_dev_api.cc b/paddle/pten/tests/kernels/test_reshape_dev_api.cc index c06cc8a8a406b..b227d3b009e89 100644 --- a/paddle/pten/tests/kernels/test_reshape_dev_api.cc +++ b/paddle/pten/tests/kernels/test_reshape_dev_api.cc @@ -54,7 +54,7 @@ TEST(DEV_API, reshape) { ASSERT_EQ(out.dims()[0], expect_shape[0]); ASSERT_EQ(out.dims()[1], expect_shape[1]); ASSERT_EQ(out.numel(), 36); - ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32); + ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32); ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); bool value_equal = true; diff --git a/paddle/pten/tests/kernels/test_scale_dev_api.cc b/paddle/pten/tests/kernels/test_scale_dev_api.cc index b057821e6cf81..b87692137251a 100644 --- a/paddle/pten/tests/kernels/test_scale_dev_api.cc +++ b/paddle/pten/tests/kernels/test_scale_dev_api.cc @@ -56,7 +56,7 @@ TEST(DEV_API, scale) { // 3. check result ASSERT_EQ(out.dims().size(), 2); ASSERT_EQ(out.numel(), 12); - ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32); + ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32); ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); auto expect_result = 23; @@ -101,7 +101,7 @@ TEST(DEV_API, scale_host) { // 3. check result ASSERT_EQ(out.dims().size(), 2); ASSERT_EQ(out.numel(), 12); - ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32); + ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32); ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); auto expect_result = 23; From 7bc3cbb5431a2b37ee52e91314df407894718457 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Wed, 17 Nov 2021 13:27:33 +0000 Subject: [PATCH 03/17] merge the code --- paddle/pten/core/tensor_meta.h | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/paddle/pten/core/tensor_meta.h b/paddle/pten/core/tensor_meta.h index b311589d792eb..cc02c57a48ba1 100644 --- a/paddle/pten/core/tensor_meta.h +++ b/paddle/pten/core/tensor_meta.h @@ -60,37 +60,4 @@ struct DenseTensorMeta { size_t offset{0}; }; -<<<<<<< HEAD -inline DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims) - : dims(dims), dtype(dtype) {} - -inline DenseTensorMeta::DenseTensorMeta(DataType dtype, - const DDim& dims, - DataLayout layout) - : dims(dims), dtype(dtype), layout(layout) {} - -inline DenseTensorMeta::DenseTensorMeta( - DataType dtype, - const DDim& dims, - DataLayout layout, - const std::vector>& lod) - : dims(dims), dtype(dtype), layout(layout), lod(lod) {} - -inline bool DenseTensorMeta::valid() const noexcept { - bool valid{true}; - valid = valid && (dtype != DataType::UNDEFINED); - valid = valid && (layout != DataLayout::UNDEFINED); - valid = valid && (is_scalar || product(dims) >= 0); - return valid; -} - -inline bool operator==(const DenseTensorMeta& lhs, const DenseTensorMeta& rhs) { - bool ret = true; - return ret && (lhs.is_scalar == rhs.is_scalar) && (lhs.dims == rhs.dims) && - (lhs.dtype == rhs.dtype) && (lhs.layout == rhs.layout) && - (lhs.lod == rhs.lod) && (lhs.offset == rhs.offset); -} - -======= ->>>>>>> d08753df36986f5a5a7384f092c578c296e5150b } // namespace pten From 7b79b03e0196ad72420a54efddf8244c7dc4271a Mon Sep 17 00:00:00 2001 From: zyfncg Date: Wed, 17 Nov 2021 13:57:02 +0000 Subject: [PATCH 04/17] merge the code --- paddle/pten/core/tensor_meta.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/paddle/pten/core/tensor_meta.cc b/paddle/pten/core/tensor_meta.cc index ebdcd9b5f250b..6bc28908a2570 100644 --- a/paddle/pten/core/tensor_meta.cc +++ b/paddle/pten/core/tensor_meta.cc @@ -16,23 +16,23 @@ limitations under the License. */ namespace pten { -DenseTensorMeta::DenseTensorMeta(DataType type, const DDim& dims) - : dims(dims), type(type) {} +DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims) + : dims(dims), dtype(dtype) {} -DenseTensorMeta::DenseTensorMeta(DataType type, +DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims, DataLayout layout) - : dims(dims), type(type), layout(layout) {} + : dims(dims), dtype(dtype), layout(layout) {} DenseTensorMeta::DenseTensorMeta(DataType type, const DDim& dims, DataLayout layout, const std::vector>& lod) - : dims(dims), type(type), layout(layout), lod(lod) {} + : dims(dims), dtype(dtype), layout(layout), lod(lod) {} bool DenseTensorMeta::valid() const noexcept { bool valid{true}; - valid = valid && (type != DataType::UNDEFINED); + valid = valid && (dtype != DataType::UNDEFINED); valid = valid && (layout != DataLayout::UNDEFINED); valid = valid && (is_scalar || product(dims) >= 0); return valid; @@ -41,7 +41,7 @@ bool DenseTensorMeta::valid() const noexcept { bool operator==(const DenseTensorMeta& lhs, const DenseTensorMeta& rhs) { bool ret = true; return ret && (lhs.is_scalar == rhs.is_scalar) && (lhs.dims == rhs.dims) && - (lhs.type == rhs.type) && (lhs.layout == rhs.layout) && + (lhs.dtype == rhs.dtype) && (lhs.layout == rhs.layout) && (lhs.lod == rhs.lod) && (lhs.offset == rhs.offset); } } // namespace pten From 471a1bf7de8a0874f5a2254859545c03758450cb Mon Sep 17 00:00:00 2001 From: zyfncg Date: Thu, 18 Nov 2021 02:48:40 +0000 Subject: [PATCH 05/17] fix the problem when merge conflict --- paddle/pten/core/tensor_meta.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/pten/core/tensor_meta.cc b/paddle/pten/core/tensor_meta.cc index 6bc28908a2570..3e06508be69d6 100644 --- a/paddle/pten/core/tensor_meta.cc +++ b/paddle/pten/core/tensor_meta.cc @@ -24,7 +24,7 @@ DenseTensorMeta::DenseTensorMeta(DataType dtype, DataLayout layout) : dims(dims), dtype(dtype), layout(layout) {} -DenseTensorMeta::DenseTensorMeta(DataType type, +DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims, DataLayout layout, const std::vector>& lod) From 835e4156785eb799a84757f12c5687065cb24119 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Fri, 19 Nov 2021 07:22:00 +0000 Subject: [PATCH 06/17] fix bug of ci caused by type of tensor_meta --- paddle/pten/tests/kernels/test_elementwise_dev_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc index c6e0d33991544..8dafce1fba7d8 100644 --- a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc +++ b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc @@ -118,7 +118,7 @@ TEST(DEV_API, subtract) { // 3. check result ASSERT_EQ(dense_out.dims().size(), 2); ASSERT_EQ(dense_out.dims()[0], 3); - ASSERT_EQ(dense_out.meta().type, pten::DataType::FLOAT32); + ASSERT_EQ(dense_out.dtype(), pten::DataType::FLOAT32); ASSERT_EQ(dense_out.meta().layout, pten::DataLayout::NCHW); auto expect_result = sub; From 20ebc3f76e09358315f354d71a20bc5adeedfb46 Mon Sep 17 00:00:00 2001 From: sandyhouse Date: Thu, 24 Mar 2022 11:34:56 +0000 Subject: [PATCH 07/17] update --- .../distributed/collective/CMakeLists.txt | 6 +- paddle/fluid/distributed/collective/Common.cc | 54 +++++ paddle/fluid/distributed/collective/Common.h | 33 ++++ .../fluid/distributed/collective/HCCLTools.cc | 39 ++++ .../fluid/distributed/collective/HCCLTools.h | 4 + .../fluid/distributed/collective/NCCLTools.cc | 46 +++++ .../fluid/distributed/collective/NCCLTools.h | 5 + .../collective/ProcessGroupHCCL.cc | 51 +---- .../collective/ProcessGroupHeter.cc | 185 ++++++++++++++++++ .../collective/ProcessGroupHeter.h | 106 ++++++++++ .../collective/ProcessGroupNCCL.cc | 56 +----- 11 files changed, 479 insertions(+), 106 deletions(-) create mode 100644 paddle/fluid/distributed/collective/Common.cc create mode 100644 paddle/fluid/distributed/collective/Common.h create mode 100644 paddle/fluid/distributed/collective/HCCLTools.cc create mode 100644 paddle/fluid/distributed/collective/NCCLTools.cc create mode 100644 paddle/fluid/distributed/collective/ProcessGroupHeter.cc create mode 100644 paddle/fluid/distributed/collective/ProcessGroupHeter.h diff --git a/paddle/fluid/distributed/collective/CMakeLists.txt b/paddle/fluid/distributed/collective/CMakeLists.txt index 49ba9479d49e9..26c06f2ec20a3 100644 --- a/paddle/fluid/distributed/collective/CMakeLists.txt +++ b/paddle/fluid/distributed/collective/CMakeLists.txt @@ -6,8 +6,10 @@ if (WITH_DISTRIBUTE) endif() if(WITH_NCCL) - cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api) + cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api) + cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api) endif() if(WITH_ASCEND_CL) - cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api) + cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api) + cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api) endif() diff --git a/paddle/fluid/distributed/collective/Common.cc b/paddle/fluid/distributed/collective/Common.cc new file mode 100644 index 0000000000000..02eab58478ccc --- /dev/null +++ b/paddle/fluid/distributed/collective/Common.cc @@ -0,0 +1,54 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/distributed/collective/Common.h" + +namespace paddle { +namespace distributed { + +std::vector GetPlaceList(const std::vector& tensors) { + std::vector places; + places.reserve(tensors.size()); + for (auto& tensor : tensors) { + places.push_back(tensor.inner_place()); + } + return places; +} + +std::string GetKeyFromPlaces(const std::vector& places) { + std::string placeList; + for (auto& place : places) { + std::stringstream tmp; + tmp << place; + if (placeList.empty()) { + placeList += tmp.str(); + } else { + placeList += "," + tmp.str(); + } + } + return placeList; +} + +static bool CheckTensorsInPlace(const std::vector& tensors, + const PlaceType type) { + return std::all_of(tensors.cbegin(), tensors.cend(), + [&](const Tensor& t) { return t.place() == type; }); +} + +bool CheckTensorsInCudaPlace(const std::vector& tensors) { + return CheckTensorsInPlace(tensors, PlaceType::kGPU); +} + +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/collective/Common.h b/paddle/fluid/distributed/collective/Common.h new file mode 100644 index 0000000000000..9569f4c61acef --- /dev/null +++ b/paddle/fluid/distributed/collective/Common.h @@ -0,0 +1,33 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/platform/place.h" +#include "paddle/phi/api/include/api.h" +namespace paddle { +namespace distributed { + +using Tensor = paddle::experimental::Tensor; + +using Place = paddle::platform::Place; +// Get the list of devices from list of tensors +std::vector GetPlaceList(const std::vector& tensors); +// Get the deviceList String from the list of devices +std::string GetKeyFromPlaces(const std::vector& places); + +bool CheckTensorsInCudaPlace(const std::vector& tensors); + +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/collective/HCCLTools.cc b/paddle/fluid/distributed/collective/HCCLTools.cc new file mode 100644 index 0000000000000..c9f69447340db --- /dev/null +++ b/paddle/fluid/distributed/collective/HCCLTools.cc @@ -0,0 +1,39 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/distributed/collective/HCCLTools.h" + +HcclReduceOp ToHCCLRedType(ReduceOp reduction) { + static const std::map red_type = { + {ReduceOp::MIN, HCCL_REDUCE_MIN}, + {ReduceOp::MAX, HCCL_REDUCE_MAX}, + {ReduceOp::SUM, HCCL_REDUCE_SUM}, + {ReduceOp::PRODUCT, HCCL_REDUCE_PROD}, + }; + auto it = red_type.find(reduction); + PADDLE_ENFORCE_EQ( + it != red_type.end(), true, + platform::errors::InvalidArgument("Invalid hccl reduction. " + "Must be Min | Max | Prod | Sum")); + return it->second; +} + +std::string SerializeHCCLUniqueId(const HcclRootInfo& hcclID) { + const uint8_t* bytes = reinterpret_cast(&hcclID); + std::ostringstream oss; + for (size_t i = 0; i < sizeof(hcclID); ++i) { + oss << std::hex << static_cast(bytes[i]); + } + return oss.str(); +} diff --git a/paddle/fluid/distributed/collective/HCCLTools.h b/paddle/fluid/distributed/collective/HCCLTools.h index 09789bd4d3786..a1dcf7cd9b626 100644 --- a/paddle/fluid/distributed/collective/HCCLTools.h +++ b/paddle/fluid/distributed/collective/HCCLTools.h @@ -18,6 +18,7 @@ #include #include "boost/variant.hpp" +#include "paddle/fluid/distributed/collective/Types.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/collective_helper.h" @@ -170,5 +171,8 @@ class HCCLCommManager { mutable std::mutex mutex_; }; +HcclReduceOp ToHCCLRedType(ReduceOp reduction); +std::string SerializeHCCLUniqueId(const HcclRootInfo& hcclID); + } // namespace distributed } // namespace paddle diff --git a/paddle/fluid/distributed/collective/NCCLTools.cc b/paddle/fluid/distributed/collective/NCCLTools.cc new file mode 100644 index 0000000000000..7e842ebf92166 --- /dev/null +++ b/paddle/fluid/distributed/collective/NCCLTools.cc @@ -0,0 +1,46 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/distributed/collective/NCCLTools.h" +#include "paddle/fluid/distributed/collective/Types.h" + +namespace paddle { +namespace distributed { + +ncclRedOp_t ToNCCLRedType(ReduceOp reduction) { + static const std::map red_type = { + {ReduceOp::MIN, ncclMin}, + {ReduceOp::MAX, ncclMax}, + {ReduceOp::SUM, ncclSum}, + {ReduceOp::PRODUCT, ncclProd}, + }; + auto it = red_type.find(reduction); + PADDLE_ENFORCE_EQ(it != red_type.end(), true, + platform::errors::InvalidArgument( + "Invalid nccl reduction. Must be ncclMin | ncclMax | " + "ncclProd | ncclSum")); + return it->second; +} + +std::string SerializeNCCLUniqueId(const ncclUniqueId& ncclID) { + const uint8_t* bytes = reinterpret_cast(&ncclID); + std::ostringstream oss; + for (auto i = 0; i < NCCL_UNIQUE_ID_BYTES; ++i) { + oss << std::hex << static_cast(bytes[i]); + } + return oss.str(); +} + +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/collective/NCCLTools.h b/paddle/fluid/distributed/collective/NCCLTools.h index f30b96e72d453..0454518b1836c 100644 --- a/paddle/fluid/distributed/collective/NCCLTools.h +++ b/paddle/fluid/distributed/collective/NCCLTools.h @@ -26,6 +26,8 @@ #include "paddle/fluid/platform/dynload/nccl.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/distributed/collective/Types.h" + namespace paddle { namespace distributed { @@ -194,5 +196,8 @@ class NCCLCommManager { mutable std::mutex mutex_; }; +ncclRedOp_t ToNCCLRedType(ReduceOp reduction); +std::string SerializeNCCLUniqueId(const ncclUniqueId& ncclID); + } // namespace distributed } // namespace paddle diff --git a/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc index 2deeb7ca03003..402f45707d6b1 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc @@ -13,6 +13,8 @@ // limitations under the License. #include "paddle/fluid/distributed/collective/ProcessGroupHCCL.h" +#include "paddle/fluid/distributed/collective/Common.h" +#include "paddle/fluid/distributed/collective/HCCLTools.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/device/npu/hccl_helper.h" #include "paddle/fluid/platform/device_context.h" @@ -28,55 +30,6 @@ constexpr int64_t kWaitBlockTImeout = 10; namespace paddle { namespace distributed { -static HcclReduceOp ToHCCLRedType(ReduceOp reduction) { - static const std::map red_type = { - {ReduceOp::MIN, HCCL_REDUCE_MIN}, - {ReduceOp::MAX, HCCL_REDUCE_MAX}, - {ReduceOp::SUM, HCCL_REDUCE_SUM}, - {ReduceOp::PRODUCT, HCCL_REDUCE_PROD}, - }; - auto it = red_type.find(reduction); - PADDLE_ENFORCE_EQ( - it != red_type.end(), true, - platform::errors::InvalidArgument("Invalid hccl reduction. " - "Must be Min | Max | Prod | Sum")); - return it->second; -} - -std::string SerializeHCCLUniqueId(const HcclRootInfo& hcclID) { - const uint8_t* bytes = reinterpret_cast(&hcclID); - std::ostringstream oss; - for (size_t i = 0; i < sizeof(hcclID); ++i) { - oss << std::hex << static_cast(bytes[i]); - } - return oss.str(); -} - -// Get the list of devices from list of tensors -std::vector GetPlaceList(const std::vector& tensors) { - std::vector places; - places.reserve(tensors.size()); - for (auto& tensor : tensors) { - places.push_back(tensor.inner_place()); - } - return places; -} - -// Get the deviceList String from the list of devices -std::string GetKeyFromPlaces(const std::vector& places) { - std::string placeList; - for (auto& place : places) { - std::stringstream tmp; - tmp << place; - if (placeList.empty()) { - placeList += tmp.str(); - } else { - placeList += "," + tmp.str(); - } - } - return placeList; -} - // bool CheckTensorsInNPUPlace(const std::vector& tensors) { // return std::all_of(tensors.cbegin(), tensors.cend(), [&](const Tensor& t) { // return t.place() == platform::DeviceType::NPU; diff --git a/paddle/fluid/distributed/collective/ProcessGroupHeter.cc b/paddle/fluid/distributed/collective/ProcessGroupHeter.cc new file mode 100644 index 0000000000000..42ea0d46cd6da --- /dev/null +++ b/paddle/fluid/distributed/collective/ProcessGroupHeter.cc @@ -0,0 +1,185 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/distributed/collective/ProcessGroupHeter.h" +#include "paddle/fluid/platform/device/gpu/nccl_helper.h" +#include "paddle/fluid/platform/place.h" +#include "paddle/phi/api/include/api.h" +#include "paddle/phi/common/place.h" + +constexpr int64_t kWaitBlockTImeout = 10; + +namespace paddle { +namespace distributed { + +using Place = paddle::platform::Place; + +std::shared_ptr ProcessGroupHeter::CreateTask( + int rank, CommType comm_type, const std::vector& inputs) { + return std::make_shared(rank, comm_type, + inputs); +} + +ProcessGroupHeter::HeterTask::HeterTask(int rank, CommType CommType, + const std::vector& inputs) + : Task(rank, inputs, CommType) {} + +ProcessGroupHeter::HeterTask::~HeterTask() {} + +bool ProcessGroupHeter::HeterTask::IsCompleted() { return true; } + +// TODO(sheniang03): Add timeout for wait, now timeout unused +bool ProcessGroupHeter::HeterTask::Wait(std::chrono::milliseconds timeout) { + return true; +} + +ProcessGroupHeter::ProcessGroupHeter(const std::shared_ptr& store, + int rank, int size, int local_rank, + int local_size, bool with_switch, + int gloo_rank, int gloo_size) + : ProcessGroup(rank, size), + store_(store), + local_rank_(local_rank), + local_size_(local_size), + with_switch_(with_switch), + gloo_rank_(gloo_rank), + gloo_size_(gloo_size) { +#if defined(PADDLE_WITH_NCCL) + inner_pg_ = std::make_shared(store, local_rank, local_size); +#elif defined(PADDLE_WITH_ASCEND_CL) + inner_pg_ = std::make_shared(store, local_rank, local_size); +#else + PADDLE_THROW(platform::errors::InvalidArgument( + "ProcessGroupHeter only supports NCCL and HCCL now."); +#endif + if (with_switch_) { + // TODO(sandyhouse) starts a client to connect the cloud switch module + } else if (local_rank_ == 0) { + auto opts = ProcessGroupGloo::GlooOptions::create(); + opts->device = ProcessGroupGloo::createDefaultDevice(); + inter_pg_ = + std::make_shared(store, gloo_rank_, gloo_size_, opts); + } +} + +std::shared_ptr ProcessGroupHeter::AllReduce( + std::vector& tensors, const AllreduceOptions& opts) { +#if defined(PADDLE_WITH_NCCL) + PADDLE_ENFORCE_EQ( + CheckTensorsInCudaPlace(tensors), true, + platform::errors::InvalidArgument("All inputs should be in CudaPlace.")); +#elif defined(PADDLE_WITH_ASCEND_CL) + PADDLE_ENFORCE_EQ( + CheckTensorsInNPUPlace(tensors), true, + platform::errors::InvalidArgument("All inputs should be in NPUPlace.")); +#endif + + // Step1: do allreduce in inner cluster + auto task = inner_pg_->AllReduce(tensors, opts); + task->Wait(); + + // Step2: copy tensors to CPU + if (local_rank_ == 0) { + std::vector cpu_tensors(tensors.size()); + for (size_t i = 0; i < tensors.size(); i++) { + auto dense_gpu_tensor = + std::dynamic_pointer_cast(tensors[i].impl()); + auto dense_cpu_tensor = + std::dynamic_pointer_cast(cpu_tensors[i].impl()); + dense_cpu_tensor->Resize(tensors[i].dims()); + framework::TensorCopySync(*dense_gpu_tensor, platform::CPUPlace(), + dense_cpu_tensor.get()); + } + // Step3: do inter cluster allreduce + if (with_switch_) { + // TODO(sandyhouse) send to and recv from switch, and do add + } else { + auto gloo_task = inter_pg_->AllReduce(cpu_tensors, opts); + gloo_task->Wait(); + } + // Step4: copy cpu tensors to gpu + // TODO(sandyhouse) + // copy cpu tensors to gpu + for (size_t i = 0; i < tensors.size(); i++) { + auto dense_gpu_tensor = + std::dynamic_pointer_cast(tensors[i].impl()); + auto dense_cpu_tensor = + std::dynamic_pointer_cast(cpu_tensors[i].impl()); + // framework::TensorCopySync(*dense_cpu_tensor, tensors[i].place(), + // dense_gpu_tensor.get()); + framework::TensorCopySync(*dense_cpu_tensor, dense_cpu_tensor->place(), + dense_gpu_tensor.get()); + } + } + + // Step5: broadcast among inner cluster + auto b_opts = BroadcastOptions(); + b_opts.source_root = 0; + auto broadcast_task = inner_pg_->Broadcast(tensors, b_opts); + broadcast_task->Wait(); + return CreateTask(rank_, CommType::ALLREDUCE, tensors); +} + +std::shared_ptr ProcessGroupHeter::Broadcast( + std::vector& tensors, const BroadcastOptions& opts) { +#if defined(PADDLE_WITH_NCCL) + PADDLE_ENFORCE_EQ( + CheckTensorsInCudaPlace(tensors), true, + platform::errors::InvalidArgument("All inputs should be in CudaPlace.")); +#elif defined(PADDLE_WITH_ASCEND_CL) + PADDLE_ENFORCE_EQ( + CheckTensorsInNPUPlace(tensors), true, + platform::errors::InvalidArgument("All inputs should be in NPUPlace.")); +#endif + + // Step1: do broadcast in inner cluster + auto b_opts = BroadcastOptions(); + b_opts.source_root = 0; + inner_pg_->Broadcast(tensors, b_opts); + + if (local_rank_ == 0) { + std::vector cpu_tensors(tensors.size()); + for (size_t i = 0; i < tensors.size(); i++) { + auto dense_gpu_tensor = + std::dynamic_pointer_cast(tensors[i].impl()); + auto dense_cpu_tensor = + std::dynamic_pointer_cast(cpu_tensors[i].impl()); + dense_cpu_tensor->Resize(tensors[i].dims()); + framework::TensorCopySync(*dense_gpu_tensor, platform::CPUPlace(), + dense_cpu_tensor.get()); + } + if (with_switch_) { + // TODO(sandyhouse) send to and recv + } else { + auto gloo_task = inter_pg_->Broadcast(cpu_tensors, opts); + gloo_task->Wait(); + } + for (size_t i = 0; i < tensors.size(); i++) { + auto dense_gpu_tensor = + std::dynamic_pointer_cast(tensors[i].impl()); + auto dense_cpu_tensor = + std::dynamic_pointer_cast(cpu_tensors[i].impl()); + // framework::TensorCopySync(*dense_cpu_tensor, tensors[i].place(), + // dense_gpu_tensor.get()); + framework::TensorCopySync(*dense_cpu_tensor, dense_cpu_tensor->place(), + dense_gpu_tensor.get()); + } + } + auto broadcast_task = inner_pg_->Broadcast(tensors, b_opts); + broadcast_task->Wait(); + return CreateTask(rank_, CommType::BROADCAST, tensors); +} + +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/collective/ProcessGroupHeter.h b/paddle/fluid/distributed/collective/ProcessGroupHeter.h new file mode 100644 index 0000000000000..95b6961e23f21 --- /dev/null +++ b/paddle/fluid/distributed/collective/ProcessGroupHeter.h @@ -0,0 +1,106 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "paddle/fluid/distributed/collective/ProcessGroup.h" +#include "paddle/fluid/distributed/collective/ProcessGroupGloo.h" +#include "paddle/fluid/platform/cuda_device_guard.h" +#include "paddle/fluid/platform/device_context.h" + +#include "paddle/fluid/distributed/store/store.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/gen_comm_id_helper.h" +#include "paddle/fluid/platform/place.h" +#include "paddle/fluid/platform/stream/cuda_stream.h" + +#if defined(PADDLE_WITH_NCCL) +#include "paddle/fluid/distributed/collective/NCCLTools.h" +#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" +#include "paddle/fluid/platform/dynload/nccl.h" +#endif + +#if defined(PADDLE_WITH_ASCEND_CL) +#include "paddle/fluid/distributed/collective/HCCLTools.h" +#include "paddle/fluid/distributed/collective/ProcessGroupHCCL.h" +#endif + +#include "paddle/fluid/distributed/collective/Common.h" + +constexpr const char* HETER_BACKEND_NAME = "HETER_BACKEND"; + +namespace paddle { +namespace distributed { + +using Place = paddle::platform::Place; + +class ProcessGroupHeter : public ProcessGroup { + public: + class HeterTask : public ProcessGroup::Task, + public std::enable_shared_from_this { + public: + HeterTask(int rank, CommType CommType, const std::vector& inputs); + + bool IsCompleted(); + + void SynchronizeStreams() {} + + bool Wait(std::chrono::milliseconds timeout = kWaitTimeout); + + void Synchronize() {} + + virtual ~HeterTask(); + }; + + ProcessGroupHeter(const std::shared_ptr& store, int rank, int size, + int local_rank, int local_size, bool with_switch, + int gloo_rank, int gloo_size); + + const std::string GetBackendName() const override { + return std::string(HETER_BACKEND_NAME); + } + + std::shared_ptr AllReduce( + std::vector& tensors, + const AllreduceOptions& = AllreduceOptions()) override; + + std::shared_ptr Broadcast( + std::vector& tensors, + const BroadcastOptions& = BroadcastOptions()) override; + + protected: + virtual std::shared_ptr CreateTask( + int rank, CommType opType, const std::vector& inputs); + + private: + std::shared_ptr store_; + std::shared_ptr inner_pg_; + std::shared_ptr inter_pg_; + + int local_rank_; + int local_size_; + bool with_switch_; + int gloo_rank_; + int gloo_size_; +}; + +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc index 2af407f711ec1..0aa336d5830f3 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" +#include "paddle/fluid/distributed/collective/Common.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" #include "paddle/fluid/platform/place.h" #include "paddle/phi/api/include/api.h" @@ -26,61 +27,6 @@ constexpr int64_t kWaitBlockTImeout = 10; namespace paddle { namespace distributed { -static ncclRedOp_t ToNCCLRedType(ReduceOp reduction) { - static const std::map red_type = { - {ReduceOp::MIN, ncclMin}, - {ReduceOp::MAX, ncclMax}, - {ReduceOp::SUM, ncclSum}, - {ReduceOp::PRODUCT, ncclProd}, - }; - auto it = red_type.find(reduction); - PADDLE_ENFORCE_EQ(it != red_type.end(), true, - platform::errors::InvalidArgument( - "Invalid nccl reduction. Must be ncclMin | ncclMax | " - "ncclProd | ncclSum")); - return it->second; -} - -std::string SerializeNCCLUniqueId(const ncclUniqueId& ncclID) { - const uint8_t* bytes = reinterpret_cast(&ncclID); - std::ostringstream oss; - for (auto i = 0; i < NCCL_UNIQUE_ID_BYTES; ++i) { - oss << std::hex << static_cast(bytes[i]); - } - return oss.str(); -} - -// Get the list of devices from list of tensors -std::vector GetPlaceList(const std::vector& tensors) { - std::vector places; - places.reserve(tensors.size()); - for (auto& tensor : tensors) { - places.push_back(tensor.inner_place()); - } - return places; -} - -// Get the deviceList String from the list of devices -std::string GetKeyFromPlaces(const std::vector& places) { - std::string placeList; - for (auto& place : places) { - std::stringstream tmp; - tmp << place; - if (placeList.empty()) { - placeList += tmp.str(); - } else { - placeList += "," + tmp.str(); - } - } - return placeList; -} - -bool CheckTensorsInCudaPlace(const std::vector& tensors) { - return std::all_of(tensors.cbegin(), tensors.cend(), [&](const Tensor& t) { - return t.place() == PlaceType::kGPU; - }); -} - void SyncDefaultStream( const std::vector& places, std::vector& ncclEvents, // NOLINT From 829adae97160a2ed0846357fd23a834a04805d18 Mon Sep 17 00:00:00 2001 From: sandyhouse Date: Mon, 28 Mar 2022 01:49:08 +0000 Subject: [PATCH 08/17] update --- .../distributed/collective/ProcessGroup.cc | 8 +++- .../distributed/collective/ProcessGroup.h | 44 +++++++++++++++---- .../collective/ProcessGroupGloo.cc | 6 ++- .../distributed/collective/ProcessGroupGloo.h | 2 +- .../collective/ProcessGroupHCCL.cc | 4 +- .../distributed/collective/ProcessGroupHCCL.h | 3 +- .../collective/ProcessGroupHeter.cc | 26 ++++++----- .../collective/ProcessGroupHeter.h | 8 ++-- .../collective/ProcessGroupNCCL.cc | 4 +- .../distributed/collective/ProcessGroupNCCL.h | 3 +- paddle/fluid/pybind/distributed_py.cc | 14 +++--- 11 files changed, 84 insertions(+), 38 deletions(-) diff --git a/paddle/fluid/distributed/collective/ProcessGroup.cc b/paddle/fluid/distributed/collective/ProcessGroup.cc index 42ca3bd5f5be4..ab118dadd5d88 100644 --- a/paddle/fluid/distributed/collective/ProcessGroup.cc +++ b/paddle/fluid/distributed/collective/ProcessGroup.cc @@ -34,7 +34,13 @@ bool ProcessGroup::Task::Wait(std::chrono::milliseconds timeout) { void ProcessGroup::Task::Synchronize() {} -ProcessGroup::ProcessGroup(int rank, int size) : rank_(rank), size_(size) {} +ProcessGroup::ProcessGroup(int rank, int size, int gid) + : rank_(rank), size_(size) { + if (gid != IGNORE_ID) { + auto map = ProcessGroupMapFromGid::getInstance(); + map->insert(gid, this); + } +} } // namespace distributed } // namespace paddle diff --git a/paddle/fluid/distributed/collective/ProcessGroup.h b/paddle/fluid/distributed/collective/ProcessGroup.h index e43d0e8c183c7..d295d65f9b911 100644 --- a/paddle/fluid/distributed/collective/ProcessGroup.h +++ b/paddle/fluid/distributed/collective/ProcessGroup.h @@ -31,6 +31,7 @@ constexpr auto kWaitTimeout = std::chrono::milliseconds(0); namespace paddle { namespace distributed { +constexpr int IGNORE_ID = -1; using Tensor = paddle::experimental::Tensor; enum class CommType : std::uint8_t { @@ -49,14 +50,6 @@ enum class CommType : std::uint8_t { UNKNOWN = 100, }; -struct ProcessGroupStrategy { - int nranks_{1}; - int local_rank_{0}; - std::vector trainer_endpoints_{}; - std::string current_endpoint_{""}; - int nrings_{1}; -}; - class ProcessGroup { public: class Task { @@ -76,7 +69,7 @@ class ProcessGroup { bool is_completed_ = false; }; - explicit ProcessGroup(int rank, int size); + explicit ProcessGroup(int rank, int size, int gid); virtual ~ProcessGroup() {} int GetRank() const { return rank_; } @@ -151,5 +144,38 @@ class ProcessGroup { const int size_; }; +class ProcessGroupMapFromGid { + public: + bool has(int gid) { + auto it = map_.find(gid); + return it != map_.end(); + } + + void insert(int gid, ProcessGroup* pg) { + PADDLE_ENFORCE_EQ(has(gid), false, + platform::errors::PreconditionNotMet( + "The process group with id %d doesnot exist.", gid)); + map_[gid] = pg; + } + + ProcessGroup* get(int gid) { + PADDLE_ENFORCE_EQ(has(gid), false, + platform::errors::PreconditionNotMet( + "The process group with id %d doesnot exist.", gid)); + return map_.find(gid)->second; + } + + static std::shared_ptr getInstance() { + static auto s_instance = std::make_shared(); + return s_instance; + } + + ProcessGroupMapFromGid() = default; + ~ProcessGroupMapFromGid() = default; + + private: + std::unordered_map map_; +}; + } // namespace distributed } // namespace paddle diff --git a/paddle/fluid/distributed/collective/ProcessGroupGloo.cc b/paddle/fluid/distributed/collective/ProcessGroupGloo.cc index cb82677a281e9..91c3bf93849e0 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupGloo.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupGloo.cc @@ -173,8 +173,10 @@ ProcessGroupGloo::GlooTask::GlooTask(int rank, ProcessGroupGloo::ProcessGroupGloo( const std::shared_ptr& store, int rank, - int world_size, const std::shared_ptr options) - : ProcessGroup(rank, world_size), _tag(0), _store(new GlooStore(store)) { + int world_size, int gid, const std::shared_ptr options) + : ProcessGroup(rank, world_size, gid), + _tag(0), + _store(new GlooStore(store)) { _context = std::make_shared(rank, world_size); auto prefix_store = ::gloo::rendezvous::PrefixStore(std::to_string(0), *_store); diff --git a/paddle/fluid/distributed/collective/ProcessGroupGloo.h b/paddle/fluid/distributed/collective/ProcessGroupGloo.h index 71e0a40f8a761..f0bf872cfc9e4 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupGloo.h +++ b/paddle/fluid/distributed/collective/ProcessGroupGloo.h @@ -101,7 +101,7 @@ class ProcessGroupGloo : public ProcessGroup { explicit ProcessGroupGloo( const std::shared_ptr& store, int rank, - int world_size, std::shared_ptr options); + int world_size, int gid, std::shared_ptr options); ~ProcessGroupGloo() = default; diff --git a/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc index 402f45707d6b1..b21155e09d06e 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc @@ -103,8 +103,8 @@ bool ProcessGroupHCCL::HCCLTask::Wait(std::chrono::milliseconds timeout) { void ProcessGroupHCCL::HCCLTask::Synchronize() { Wait(kWaitTimeout); } ProcessGroupHCCL::ProcessGroupHCCL(const std::shared_ptr& store, - int rank, int size) - : ProcessGroup(rank, size), store_(store) {} + int rank, int size, int gid) + : ProcessGroup(rank, size, gid), store_(store) {} void ProcessGroupHCCL::BroadcastUniqueHCCLID( std::vector& hccl_ids) { // NOLINT diff --git a/paddle/fluid/distributed/collective/ProcessGroupHCCL.h b/paddle/fluid/distributed/collective/ProcessGroupHCCL.h index 83d509be2cdd7..932ae75fc6b9d 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHCCL.h +++ b/paddle/fluid/distributed/collective/ProcessGroupHCCL.h @@ -70,7 +70,8 @@ class ProcessGroupHCCL : public ProcessGroup { private: }; - ProcessGroupHCCL(const std::shared_ptr& store, int rank, int size); + ProcessGroupHCCL(const std::shared_ptr& store, int rank, int size, + int gid); const std::string GetBackendName() const override { return std::string(HCCL_BACKEND_NAME); diff --git a/paddle/fluid/distributed/collective/ProcessGroupHeter.cc b/paddle/fluid/distributed/collective/ProcessGroupHeter.cc index 42ea0d46cd6da..3854efc1e6aee 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHeter.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupHeter.cc @@ -45,31 +45,37 @@ bool ProcessGroupHeter::HeterTask::Wait(std::chrono::milliseconds timeout) { } ProcessGroupHeter::ProcessGroupHeter(const std::shared_ptr& store, - int rank, int size, int local_rank, - int local_size, bool with_switch, - int gloo_rank, int gloo_size) - : ProcessGroup(rank, size), + int rank, int size, int gid, + int local_rank, int local_size, + int gloo_rank, int gloo_size, + bool with_switch, + std::string switch_endpoint) + : ProcessGroup(rank, size, gid), store_(store), local_rank_(local_rank), local_size_(local_size), - with_switch_(with_switch), gloo_rank_(gloo_rank), - gloo_size_(gloo_size) { + gloo_size_(gloo_size), + with_switch_(with_switch) { #if defined(PADDLE_WITH_NCCL) - inner_pg_ = std::make_shared(store, local_rank, local_size); + inner_pg_ = std::make_shared(store, local_rank, local_size, + IGNORE_ID); #elif defined(PADDLE_WITH_ASCEND_CL) - inner_pg_ = std::make_shared(store, local_rank, local_size); + inner_pg_ = std::make_shared(store, local_rank, local_size, + IGNORE_ID); #else PADDLE_THROW(platform::errors::InvalidArgument( "ProcessGroupHeter only supports NCCL and HCCL now."); #endif if (with_switch_) { // TODO(sandyhouse) starts a client to connect the cloud switch module + // std::shared_ptr client_ = + // HeterClient::GetInstance({switch_endpoint}, {}, 0); } else if (local_rank_ == 0) { auto opts = ProcessGroupGloo::GlooOptions::create(); opts->device = ProcessGroupGloo::createDefaultDevice(); - inter_pg_ = - std::make_shared(store, gloo_rank_, gloo_size_, opts); + inter_pg_ = std::make_shared(store, gloo_rank_, + gloo_size_, IGNORE_ID, opts); } } diff --git a/paddle/fluid/distributed/collective/ProcessGroupHeter.h b/paddle/fluid/distributed/collective/ProcessGroupHeter.h index 95b6961e23f21..7a7ebaf604b57 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHeter.h +++ b/paddle/fluid/distributed/collective/ProcessGroupHeter.h @@ -23,6 +23,7 @@ #include "paddle/fluid/distributed/collective/ProcessGroup.h" #include "paddle/fluid/distributed/collective/ProcessGroupGloo.h" +#include "paddle/fluid/distributed/ps/service/heter_client.h" #include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/device_context.h" @@ -71,8 +72,9 @@ class ProcessGroupHeter : public ProcessGroup { }; ProcessGroupHeter(const std::shared_ptr& store, int rank, int size, - int local_rank, int local_size, bool with_switch, - int gloo_rank, int gloo_size); + int gid, int local_rank, int local_size, int gloo_rank, + int gloo_size, bool with_switch, + std::string switch_endpoints); const std::string GetBackendName() const override { return std::string(HETER_BACKEND_NAME); @@ -97,9 +99,9 @@ class ProcessGroupHeter : public ProcessGroup { int local_rank_; int local_size_; - bool with_switch_; int gloo_rank_; int gloo_size_; + bool with_switch_; }; } // namespace distributed diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc index 0aa336d5830f3..7c0752b5f367c 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc @@ -103,8 +103,8 @@ bool ProcessGroupNCCL::NCCLTask::Wait(std::chrono::milliseconds timeout) { void ProcessGroupNCCL::NCCLTask::Synchronize() { Wait(kWaitTimeout); } ProcessGroupNCCL::ProcessGroupNCCL(const std::shared_ptr& store, - int rank, int size) - : ProcessGroup(rank, size), store_(store) {} + int rank, int size, int gid) + : ProcessGroup(rank, size, gid), store_(store) {} void ProcessGroupNCCL::BroadcastUniqueNCCLID( std::vector& nccl_ids) { // NOLINT diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.h b/paddle/fluid/distributed/collective/ProcessGroupNCCL.h index aa2a2b8fa2088..4ab5374dacaf4 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.h +++ b/paddle/fluid/distributed/collective/ProcessGroupNCCL.h @@ -76,7 +76,8 @@ class ProcessGroupNCCL : public ProcessGroup { private: }; - ProcessGroupNCCL(const std::shared_ptr& store, int rank, int size); + ProcessGroupNCCL(const std::shared_ptr& store, int rank, int size, + int gid); const std::string GetBackendName() const override { return std::string(NCCL_BACKEND_NAME); diff --git a/paddle/fluid/pybind/distributed_py.cc b/paddle/fluid/pybind/distributed_py.cc index e89d8d96342e7..9f71001793da3 100644 --- a/paddle/fluid/pybind/distributed_py.cc +++ b/paddle/fluid/pybind/distributed_py.cc @@ -213,7 +213,8 @@ void BindDistributed(py::module *m) { py::class_>( *m, "ProcessGroupNCCL", ProcessGroup) - .def(py::init &, int, int>(), + .def(py::init &, int, int, + int>(), py::call_guard()); #endif @@ -221,7 +222,8 @@ void BindDistributed(py::module *m) { py::class_>( *m, "ProcessGroupHCCL", ProcessGroup) - .def(py::init &, int, int>(), + .def(py::init &, int, int, + int>(), py::call_guard()); #endif @@ -238,10 +240,10 @@ void BindDistributed(py::module *m) { py::class_>( *m, "ProcessGroupGloo", ProcessGroup) .def(py::init &, int, - int, std::shared_ptr &>(), + int, int, std::shared_ptr &>(), py::call_guard()) .def(py::init([](const std::shared_ptr &store, - int rank, int world_size) { + int rank, int world_size, int gid) { auto opts = GlooOptions::create(); char *ifname = getenv(GLOO_SOCKET_IFNAME_ENV.c_str()); if (ifname && strlen(ifname) > 1) { @@ -251,10 +253,10 @@ void BindDistributed(py::module *m) { opts->device = ProcessGroupGloo::createDefaultDevice(); } return std::make_shared(store, rank, world_size, - opts); + gid, opts); }), py::arg("store"), py::arg("rank"), py::arg("world_size"), - py::call_guard()) + py::arg("group_id"), py::call_guard()) .def_static("create_default_device", &ProcessGroupGloo::createDefaultDevice); #endif From 67d84d9a5d610a548ba017fbfeac6435b5664687 Mon Sep 17 00:00:00 2001 From: sandyhouse Date: Mon, 28 Mar 2022 06:59:51 +0000 Subject: [PATCH 09/17] update --- .../distributed/collective/ProcessGroup.h | 6 +++++ .../collective/ProcessGroupHeter.cc | 27 +++++++++++++++++++ .../collective/ProcessGroupHeter.h | 2 ++ .../operators/collective/c_broadcast_op.cu.cc | 9 +++++++ 4 files changed, 44 insertions(+) diff --git a/paddle/fluid/distributed/collective/ProcessGroup.h b/paddle/fluid/distributed/collective/ProcessGroup.h index d295d65f9b911..4e9eb7b664c41 100644 --- a/paddle/fluid/distributed/collective/ProcessGroup.h +++ b/paddle/fluid/distributed/collective/ProcessGroup.h @@ -92,6 +92,12 @@ class ProcessGroup { "ProcessGroup%s does not support broadcast", GetBackendName())); } + void Broadcast(const phi::DenseTensor* in, phi::DenseTensor* out) { + PADDLE_THROW(platform::errors::InvalidArgument( + "ProcessGroup%s does not support broadcast for static", + GetBackendName())); + } + virtual std::shared_ptr Barrier( const BarrierOptions& = BarrierOptions()) { PADDLE_THROW(platform::errors::InvalidArgument( diff --git a/paddle/fluid/distributed/collective/ProcessGroupHeter.cc b/paddle/fluid/distributed/collective/ProcessGroupHeter.cc index 3854efc1e6aee..e61c4c2f6410f 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHeter.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupHeter.cc @@ -187,5 +187,32 @@ std::shared_ptr ProcessGroupHeter::Broadcast( return CreateTask(rank_, CommType::BROADCAST, tensors); } +void ProcessGroupHeter::Broadcast(const phi::DenseTensor* in, + phi::DenseTensor* out) { + // Step1: do broadcast in inner cluster + auto b_opts = BroadcastOptions(); + b_opts.source_root = 0; + inner_pg_->Broadcast(in, out); + + if (local_rank_ == 0) { + Tensor cpu_tensor; + auto dense_cpu_tensor = + std::dynamic_pointer_cast(cpu_tensor.impl()); + dense_cpu_tensor->Resize(in->dims()); + framework::TensorCopySync(*in, platform::CPUPlace(), + dense_cpu_tensor.get()); + if (with_switch_) { + // TODO(sandyhouse) send to and recv + } else { + auto gloo_task = inter_pg_->Broadcast(cpu_tensors, opts); + gloo_task->Wait(); + } + framework::TensorCopySync(*dense_cpu_tensor, dense_cpu_tensor->place(), + out); + } + auto broadcast_task = inner_pg_->Broadcast(out, out); + broadcast_task->Wait(); +} + } // namespace distributed } // namespace paddle diff --git a/paddle/fluid/distributed/collective/ProcessGroupHeter.h b/paddle/fluid/distributed/collective/ProcessGroupHeter.h index 7a7ebaf604b57..e8290a0c87917 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHeter.h +++ b/paddle/fluid/distributed/collective/ProcessGroupHeter.h @@ -88,6 +88,8 @@ class ProcessGroupHeter : public ProcessGroup { std::vector& tensors, const BroadcastOptions& = BroadcastOptions()) override; + void Broadcast(const phi::DenseTensor* in, phi::DenseTensor* out) override; + protected: virtual std::shared_ptr CreateTask( int rank, CommType opType, const std::vector& inputs); diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc index b16f256ee6cf3..0ad61bb16b51e 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc @@ -18,6 +18,8 @@ limitations under the License. */ #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" #endif +#include "paddle/fluid/distributed/collective/ProcessGroup.h" +#include "paddle/phi/api/include/tensor.h" namespace paddle { namespace operators { @@ -36,6 +38,13 @@ class CBroadcastOpCUDAKernel : public framework::OpKernel { int rid = ctx.Attr("ring_id"); auto place = ctx.GetPlace(); auto comm = platform::NCCLCommContext::Instance().Get(rid, place); + auto map = distributed::ProcessGroupMapFromGid::getInstance(); + if (map->has(rid)) { + // Use ProcessGroup + distributed::ProcessGroup* pg = map->get(rid); + pg->Broadcast(x, out); + return; + } gpuStream_t stream = nullptr; if (ctx.Attr("use_calc_stream")) { From 8922319c774377e2d5f1a554da7e15b39c09145e Mon Sep 17 00:00:00 2001 From: sandyhouse Date: Mon, 28 Mar 2022 18:03:06 +0800 Subject: [PATCH 10/17] update --- .../fluid/distributed/collective/HCCLTools.cc | 7 +++++++ .../distributed/collective/ProcessGroup.h | 2 +- .../collective/ProcessGroupHeter.cc | 19 +++++-------------- .../collective/ProcessGroupHeter.h | 4 ++-- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/paddle/fluid/distributed/collective/HCCLTools.cc b/paddle/fluid/distributed/collective/HCCLTools.cc index c9f69447340db..526a683e057c0 100644 --- a/paddle/fluid/distributed/collective/HCCLTools.cc +++ b/paddle/fluid/distributed/collective/HCCLTools.cc @@ -13,6 +13,10 @@ // limitations under the License. #include "paddle/fluid/distributed/collective/HCCLTools.h" +#include "paddle/fluid/distributed/collective/Types.h" + +namespace paddle { +namespace distributed { HcclReduceOp ToHCCLRedType(ReduceOp reduction) { static const std::map red_type = { @@ -37,3 +41,6 @@ std::string SerializeHCCLUniqueId(const HcclRootInfo& hcclID) { } return oss.str(); } + +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/collective/ProcessGroup.h b/paddle/fluid/distributed/collective/ProcessGroup.h index 4e9eb7b664c41..36a00a7d31758 100644 --- a/paddle/fluid/distributed/collective/ProcessGroup.h +++ b/paddle/fluid/distributed/collective/ProcessGroup.h @@ -92,7 +92,7 @@ class ProcessGroup { "ProcessGroup%s does not support broadcast", GetBackendName())); } - void Broadcast(const phi::DenseTensor* in, phi::DenseTensor* out) { + virtual void Broadcast(const phi::DenseTensor* in, phi::DenseTensor* out) { PADDLE_THROW(platform::errors::InvalidArgument( "ProcessGroup%s does not support broadcast for static", GetBackendName())); diff --git a/paddle/fluid/distributed/collective/ProcessGroupHeter.cc b/paddle/fluid/distributed/collective/ProcessGroupHeter.cc index e61c4c2f6410f..ffd653042494d 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHeter.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupHeter.cc @@ -85,10 +85,6 @@ std::shared_ptr ProcessGroupHeter::AllReduce( PADDLE_ENFORCE_EQ( CheckTensorsInCudaPlace(tensors), true, platform::errors::InvalidArgument("All inputs should be in CudaPlace.")); -#elif defined(PADDLE_WITH_ASCEND_CL) - PADDLE_ENFORCE_EQ( - CheckTensorsInNPUPlace(tensors), true, - platform::errors::InvalidArgument("All inputs should be in NPUPlace.")); #endif // Step1: do allreduce in inner cluster @@ -143,10 +139,6 @@ std::shared_ptr ProcessGroupHeter::Broadcast( PADDLE_ENFORCE_EQ( CheckTensorsInCudaPlace(tensors), true, platform::errors::InvalidArgument("All inputs should be in CudaPlace.")); -#elif defined(PADDLE_WITH_ASCEND_CL) - PADDLE_ENFORCE_EQ( - CheckTensorsInNPUPlace(tensors), true, - platform::errors::InvalidArgument("All inputs should be in NPUPlace.")); #endif // Step1: do broadcast in inner cluster @@ -190,8 +182,6 @@ std::shared_ptr ProcessGroupHeter::Broadcast( void ProcessGroupHeter::Broadcast(const phi::DenseTensor* in, phi::DenseTensor* out) { // Step1: do broadcast in inner cluster - auto b_opts = BroadcastOptions(); - b_opts.source_root = 0; inner_pg_->Broadcast(in, out); if (local_rank_ == 0) { @@ -204,14 +194,15 @@ void ProcessGroupHeter::Broadcast(const phi::DenseTensor* in, if (with_switch_) { // TODO(sandyhouse) send to and recv } else { - auto gloo_task = inter_pg_->Broadcast(cpu_tensors, opts); - gloo_task->Wait(); + std::vector cpu_tensors = {cpu_tensor}; + // auto gloo_task = inter_pg_->Broadcast(cpu_tensors); + // gloo_task->Wait(); + inter_pg_->Broadcast(cpu_tensors); } framework::TensorCopySync(*dense_cpu_tensor, dense_cpu_tensor->place(), out); } - auto broadcast_task = inner_pg_->Broadcast(out, out); - broadcast_task->Wait(); + inner_pg_->Broadcast(out, out); } } // namespace distributed diff --git a/paddle/fluid/distributed/collective/ProcessGroupHeter.h b/paddle/fluid/distributed/collective/ProcessGroupHeter.h index e8290a0c87917..9b4cf3f2056da 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHeter.h +++ b/paddle/fluid/distributed/collective/ProcessGroupHeter.h @@ -23,8 +23,7 @@ #include "paddle/fluid/distributed/collective/ProcessGroup.h" #include "paddle/fluid/distributed/collective/ProcessGroupGloo.h" -#include "paddle/fluid/distributed/ps/service/heter_client.h" -#include "paddle/fluid/platform/cuda_device_guard.h" +// #include "paddle/fluid/distributed/ps/service/heter_client.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/distributed/store/store.h" @@ -36,6 +35,7 @@ #if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/distributed/collective/NCCLTools.h" #include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" +#include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/dynload/nccl.h" #endif From e73ca14acc8aca6178324926b1f204440c80b902 Mon Sep 17 00:00:00 2001 From: sandyhouse Date: Mon, 28 Mar 2022 18:06:54 +0800 Subject: [PATCH 11/17] update --- paddle/fluid/distributed/collective/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/distributed/collective/CMakeLists.txt b/paddle/fluid/distributed/collective/CMakeLists.txt index 26c06f2ec20a3..79f3e7f1e86db 100644 --- a/paddle/fluid/distributed/collective/CMakeLists.txt +++ b/paddle/fluid/distributed/collective/CMakeLists.txt @@ -11,5 +11,5 @@ if(WITH_NCCL) endif() if(WITH_ASCEND_CL) cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api) - cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api) + cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api) endif() From d0179a53514ffbe84796c91411930678e175d6cc Mon Sep 17 00:00:00 2001 From: sandyhouse Date: Mon, 28 Mar 2022 18:56:26 +0800 Subject: [PATCH 12/17] update --- paddle/fluid/pybind/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 7b223f7ed27e2..69a928fc50d3d 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -91,12 +91,14 @@ if(NOT ON_INFER) set (PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer) if (WITH_NCCL) set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_nccl) + set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_heter) endif() if (WITH_GLOO) set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_gloo) endif() if(WITH_ASCEND_CL) set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_hccl) + set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_heter) endif() set(PYBIND_SRCS ${PYBIND_SRCS} distributed_py.cc) endif() From f4bc987157948be7836c894d0ce32020b46c63bf Mon Sep 17 00:00:00 2001 From: sandyhouse Date: Tue, 29 Mar 2022 03:07:26 +0000 Subject: [PATCH 13/17] update --- paddle/fluid/distributed/collective/CMakeLists.txt | 8 ++++++-- paddle/fluid/distributed/collective/ProcessGroupHeter.h | 4 ++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/distributed/collective/CMakeLists.txt b/paddle/fluid/distributed/collective/CMakeLists.txt index 26c06f2ec20a3..d2002bb09e139 100644 --- a/paddle/fluid/distributed/collective/CMakeLists.txt +++ b/paddle/fluid/distributed/collective/CMakeLists.txt @@ -7,9 +7,13 @@ endif() if(WITH_NCCL) cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api) - cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api) + if (WITH_DISTRIBUTE) + cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api) + endif() endif() if(WITH_ASCEND_CL) cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api) - cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api) + if (WITH_DISTRIBUTE) + cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api) + endif() endif() diff --git a/paddle/fluid/distributed/collective/ProcessGroupHeter.h b/paddle/fluid/distributed/collective/ProcessGroupHeter.h index e8290a0c87917..f813ab587b9fd 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHeter.h +++ b/paddle/fluid/distributed/collective/ProcessGroupHeter.h @@ -27,6 +27,10 @@ #include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/device_context.h" +#ifdef PADDLE_WITH_GLOO +#include "paddle/fluid/framework/fleet/gloo_wrapper.h" +#endif + #include "paddle/fluid/distributed/store/store.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/gen_comm_id_helper.h" From 83c6fef6dab2a61334ed68dc2c48fa22dc30fa42 Mon Sep 17 00:00:00 2001 From: sandyhouse Date: Tue, 29 Mar 2022 03:17:51 +0000 Subject: [PATCH 14/17] update --- paddle/fluid/pybind/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 69a928fc50d3d..7b223f7ed27e2 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -91,14 +91,12 @@ if(NOT ON_INFER) set (PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer) if (WITH_NCCL) set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_nccl) - set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_heter) endif() if (WITH_GLOO) set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_gloo) endif() if(WITH_ASCEND_CL) set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_hccl) - set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_heter) endif() set(PYBIND_SRCS ${PYBIND_SRCS} distributed_py.cc) endif() From 0df9a69009800f3c39359e27e0bbb3f187ed5417 Mon Sep 17 00:00:00 2001 From: sandyhouse Date: Tue, 29 Mar 2022 03:30:39 +0000 Subject: [PATCH 15/17] update --- paddle/fluid/distributed/collective/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/distributed/collective/CMakeLists.txt b/paddle/fluid/distributed/collective/CMakeLists.txt index a2059a11c3afb..6fb805a72e4de 100644 --- a/paddle/fluid/distributed/collective/CMakeLists.txt +++ b/paddle/fluid/distributed/collective/CMakeLists.txt @@ -15,6 +15,6 @@ endif() if(WITH_ASCEND_CL) cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api) if (WITH_DISTRIBUTE) - cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api) + cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api) endif() endif() From 4f0ff5d9431d52c9bd13bce07e65fd1244d56e44 Mon Sep 17 00:00:00 2001 From: sandyhouse Date: Tue, 29 Mar 2022 05:01:57 +0000 Subject: [PATCH 16/17] update --- paddle/fluid/pybind/distributed_py.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/pybind/distributed_py.cc b/paddle/fluid/pybind/distributed_py.cc index 9f71001793da3..d74b11c069fbb 100644 --- a/paddle/fluid/pybind/distributed_py.cc +++ b/paddle/fluid/pybind/distributed_py.cc @@ -256,7 +256,7 @@ void BindDistributed(py::module *m) { gid, opts); }), py::arg("store"), py::arg("rank"), py::arg("world_size"), - py::arg("group_id"), py::call_guard()) + py::arg("group_id") = 0, py::call_guard()) .def_static("create_default_device", &ProcessGroupGloo::createDefaultDevice); #endif From 3ef390b5499747d20cdee3bdc00e0f74100d6b4d Mon Sep 17 00:00:00 2001 From: sandyhouse Date: Wed, 30 Mar 2022 04:31:47 +0000 Subject: [PATCH 17/17] update --- paddle/fluid/pybind/distributed_py.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/pybind/distributed_py.cc b/paddle/fluid/pybind/distributed_py.cc index d74b11c069fbb..6c74ea2eef4d0 100644 --- a/paddle/fluid/pybind/distributed_py.cc +++ b/paddle/fluid/pybind/distributed_py.cc @@ -215,7 +215,8 @@ void BindDistributed(py::module *m) { *m, "ProcessGroupNCCL", ProcessGroup) .def(py::init &, int, int, int>(), - py::call_guard()); + py::arg("store"), py::arg("rank"), py::arg("world_size"), + py::arg("group_id") = 0, py::call_guard()); #endif #if defined(PADDLE_WITH_ASCEND_CL) @@ -224,7 +225,8 @@ void BindDistributed(py::module *m) { *m, "ProcessGroupHCCL", ProcessGroup) .def(py::init &, int, int, int>(), - py::call_guard()); + py::arg("store"), py::arg("rank"), py::arg("world_size"), + py::arg("group_id") = 0, py::call_guard()); #endif py::class_