Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
b7d04aa
Merge pull request #230 from PaddlePaddle/develop
HydrogenSulfate May 10, 2024
2fd9dc0
Merge branch 'develop' of https://github.com/HydrogenSulfate/Paddle i…
HydrogenSulfate May 10, 2024
4c5afe2
Merge branch 'develop' of https://github.com/HydrogenSulfate/Paddle i…
HydrogenSulfate May 15, 2024
056d19b
Merge branch 'develop' of https://github.com/HydrogenSulfate/Paddle i…
HydrogenSulfate May 15, 2024
c022e44
Merge branch 'develop' of https://github.com/HydrogenSulfate/Paddle i…
HydrogenSulfate May 31, 2024
d723c27
Merge branch 'develop' of https://github.com/HydrogenSulfate/Paddle i…
HydrogenSulfate Jun 6, 2024
04664b8
Merge branch 'develop' of https://github.com/HydrogenSulfate/Paddle i…
HydrogenSulfate Jun 6, 2024
2f2777c
Merge branch 'develop' of https://github.com/HydrogenSulfate/Paddle i…
HydrogenSulfate Jun 19, 2024
36efc60
Merge branch 'develop' of https://github.com/HydrogenSulfate/Paddle i…
HydrogenSulfate Jul 4, 2024
6d3d314
Merge pull request #268 from PaddlePaddle/develop
HydrogenSulfate Jul 4, 2024
8eed6d0
Merge branch 'develop' of https://github.com/HydrogenSulfate/Paddle i…
HydrogenSulfate Jul 4, 2024
f6815d3
Merge branch 'develop' of https://github.com/HydrogenSulfate/Paddle i…
HydrogenSulfate Jul 12, 2024
1b3a43b
Merge branch 'develop' of https://github.com/HydrogenSulfate/Paddle i…
HydrogenSulfate Jul 16, 2024
9550534
Merge branch 'develop' of https://github.com/HydrogenSulfate/Paddle i…
HydrogenSulfate Jul 22, 2024
0053ffb
Merge branch 'develop' of https://github.com/HydrogenSulfate/Paddle i…
HydrogenSulfate Jul 24, 2024
928d668
Merge branch 'develop' of https://github.com/HydrogenSulfate/Paddle i…
HydrogenSulfate Jul 24, 2024
2cb8a3c
support shared memory via dlpack tensor format
HydrogenSulfate Sep 2, 2024
c0d6fd4
remove deleter
HydrogenSulfate Sep 2, 2024
e2b0d9c
update todlpack code
HydrogenSulfate Sep 9, 2024
1047fc9
Merge branch 'develop_new' into support_dlpack
HydrogenSulfate Sep 9, 2024
31f918c
fix for other dtype
HydrogenSulfate Sep 10, 2024
09336d4
add type check for dlpack
HydrogenSulfate Sep 10, 2024
10a34a0
add data_ptr consistency unitest
HydrogenSulfate Sep 10, 2024
a18afea
remove cuda_runtime.h and remove redundant annotation
HydrogenSulfate Sep 10, 2024
b65b74c
update ref doc
HydrogenSulfate Sep 10, 2024
d8cadb4
restore
HydrogenSulfate Sep 10, 2024
3db178a
use map and mutex instead of std::function
HydrogenSulfate Sep 11, 2024
79cf661
polish annotation
HydrogenSulfate Sep 11, 2024
342768b
fix for cpu tensor
HydrogenSulfate Sep 11, 2024
b5d777b
add data_ptr consistency unitest
HydrogenSulfate Sep 11, 2024
72e85f9
update thirdpary/dlpack to v0.8 to support Tensor of bool dtype
HydrogenSulfate Sep 12, 2024
f64e0ce
move std::is_same<T, bool> before std::is_unsigned<T>
HydrogenSulfate Sep 12, 2024
5f8c202
restore overloaded version of TensorFromDLPack
HydrogenSulfate Sep 12, 2024
3169050
restore overload version of GetDstPtrByDLDataType and update dlpack d…
HydrogenSulfate Sep 12, 2024
3783f74
fix unitest
HydrogenSulfate Sep 13, 2024
741f078
reduce matrix size and loop time in test_dlpack
HydrogenSulfate Sep 13, 2024
de95c8a
support directly converting from object that has '__dlpack__' attribute
HydrogenSulfate Sep 15, 2024
1eecb4e
Merge branch 'develop' into support_dlpack
HydrogenSulfate Sep 15, 2024
1163d33
support CUDAPinnedPlace, add unitest for place consistency check, rem…
HydrogenSulfate Sep 18, 2024
2dc07e1
Merge branch 'develop' into support_dlpack
HydrogenSulfate Sep 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/external/dlpack.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
include(ExternalProject)

set(DLPACK_PREFIX_DIR ${THIRD_PARTY_PATH}/dlpack)
set(DLPACK_TAG v0.4)
set(DLPACK_TAG v0.8)
set(DLPACK_INCLUDE_DIR ${THIRD_PARTY_PATH}/dlpack/src/extern_dlpack/include)
set(SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/dlpack)
include_directories(${SOURCE_DIR}/include)
Expand Down
50 changes: 28 additions & 22 deletions paddle/fluid/framework/dlpack_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ static ::DLDataType GetDLDataTypeCode() {
} else if (std::is_same<T, phi::dtype::float16>::value ||
std::is_floating_point<T>::value) {
dtype.code = kDLFloat;
} else if (std::is_same<T, bool>::value) {
// Since std::is_unsigned<bool>::value is True,
// it is necessary to evaluate bool before std::is_unsigned.
dtype.code = kDLBool;
} else if (std::is_unsigned<T>::value) {
dtype.code = kDLUInt;
} else if (std::is_integral<T>::value) {
Expand Down Expand Up @@ -99,7 +103,7 @@ struct DLDeviceVisitor {
inline ::DLDevice operator()(const phi::GPUPlace &place) const {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
::DLDevice device;
device.device_type = kDLGPU;
device.device_type = kDLCUDA;
device.device_id = place.device; // NOLINT
return device;
#else
Expand All @@ -111,7 +115,7 @@ struct DLDeviceVisitor {
inline ::DLDevice operator()(const phi::GPUPinnedPlace &place) const {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
::DLDevice device;
device.device_type = kDLCPUPinned;
device.device_type = kDLCUDAHost;
device.device_id = 0;
return device;
#else
Expand All @@ -125,52 +129,54 @@ struct DLDeviceVisitor {
struct PaddleDLMTensor {
phi::DenseTensor handle;
DLManagedTensor tensor;
PaddleDLMTensor() : tensor() {}
};

void deleter(DLManagedTensor *arg) {
delete[] arg->dl_tensor.shape;
delete[] arg->dl_tensor.strides;
delete static_cast<PaddleDLMTensor *>(arg->manager_ctx);
static void deleter(DLManagedTensor *self) {
if (self && self->manager_ctx) {
delete[] self->dl_tensor
.shape; // delete shape allocated in toDLPack manually
delete[] self->dl_tensor
.strides; // delete strides allocated in toDLPack manually
delete static_cast<PaddleDLMTensor *>(self->manager_ctx);
}
}

DLManagedTensor *toDLPack(const phi::DenseTensor &src) {
PaddleDLMTensor *pdDLMTensor(new PaddleDLMTensor);
pdDLMTensor->handle = const_cast<phi::DenseTensor &>(src);
pdDLMTensor->tensor.manager_ctx = pdDLMTensor;
pdDLMTensor->tensor.deleter = &deleter;
pdDLMTensor->tensor.dl_tensor.data = const_cast<void *>(src.data());

// init ndim
using DimType = decltype(pdDLMTensor->tensor.dl_tensor.ndim); // int
pdDLMTensor->tensor.dl_tensor.ndim = static_cast<DimType>(src.dims().size());
using DimType = decltype(pdDLMTensor->tensor.dl_tensor.ndim); // int32_t
auto _shape = src.dims();
pdDLMTensor->tensor.dl_tensor.ndim = static_cast<DimType>(_shape.size());
DimType ndim = pdDLMTensor->tensor.dl_tensor.ndim;

// init shape
auto shape = new int64_t[ndim];
int64_t *shape = new int64_t[ndim];
for (DimType i = 0; i < ndim; ++i) {
shape[i] = src.dims()[i];
shape[i] = _shape[i];
}
pdDLMTensor->tensor.dl_tensor.shape = shape;

// init stride
auto strides = new int64_t[ndim];
for (DimType i = 0; i < ndim; ++i) {
strides[i] = 1;
}
for (DimType i = ndim - 2; i >= 0; --i) {
strides[i] = shape[i + 1] * strides[i + 1];
// init strides
auto _strides = src.strides();
int64_t *strides = new int64_t[ndim];
for (int i = 0; i < src.dims().size(); i++) {
strides[i] = _strides[i];
if (shape[i] < 2) {
strides[i] = 1;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里的改动是等价的么?

Copy link
Contributor Author

@HydrogenSulfate HydrogenSulfate Sep 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里的改动是等价的么?

原先的strides计算方法应该是有问题的,没有考虑x是non-contiguous的情况,而是直接根据shape算strides,这会导致转化后的dlpack张量一定是contiguous。参考pytorch的做法,应该直接使用原张量的strides即可https://github.com/pytorch/pytorch/blob/db80b98ec460ca5b2fd84c1dfb6426925f64c8cc/aten/src/ATen/DLConvertor.cpp#L267-L276

根据你说的我测了下这个PR转换前后的strides,好像from_dlpack对strides的处理还有点问题,我需要再修改一下,并再加一个strides单测。

}
pdDLMTensor->tensor.dl_tensor.strides = strides;

// init device, DLDevice type with device_type and device_id
pdDLMTensor->tensor.dl_tensor.data = const_cast<void *>(src.data());
auto place = src.place();
pdDLMTensor->tensor.dl_tensor.device =
phi::VisitPlace(place, internal::DLDeviceVisitor());

pdDLMTensor->tensor.dl_tensor.dtype = internal::GetDLDataTypeFromTypeIndex(
framework::TransToProtoVarType(src.dtype()));

pdDLMTensor->tensor.dl_tensor.byte_offset = 0;
return &(pdDLMTensor->tensor);
}
Expand Down
195 changes: 154 additions & 41 deletions paddle/fluid/framework/tensor_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor_util.h"

#include <algorithm>
#include <functional>
#include <limits>
#include <memory>
#include <string>
Expand Down Expand Up @@ -793,6 +794,158 @@ void* GetDstPtrByDLDataType(DLDataType type,
}
}

// get Tensor data dtype from given DLDataType
phi::DataType GetDstPtrByDLDataType(DLDataType type) {
// vector types not currently supported
PADDLE_ENFORCE_LE(
type.lanes,
1,
common::errors::Unimplemented("Vector type is not supported currently."));

switch (type.bits) {
case 8:
if (type.code == kDLBool) return phi::DataType::BOOL;
if (type.code == kDLInt) return phi::DataType::INT8;
if (type.code == kDLUInt) return phi::DataType::UINT8;
PADDLE_THROW(common::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code,
type.bits));
case 16:
if (type.code == kDLInt) return phi::DataType::INT16;
if (type.code == kDLFloat) return phi::DataType::FLOAT16;
if (type.code == kDLBfloat) return phi::DataType::BFLOAT16;
PADDLE_THROW(common::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code,
type.bits));
case 32:
if (type.code == kDLInt) return phi::DataType::INT32;
if (type.code == kDLFloat) return phi::DataType::FLOAT32;
PADDLE_THROW(common::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code,
type.bits));
case 64:
if (type.code == kDLInt) return phi::DataType::INT64;
if (type.code == kDLFloat) return phi::DataType::FLOAT64;
if (type.code == kDLComplex) return phi::DataType::COMPLEX64;
PADDLE_THROW(common::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code,
type.bits));
case 128:
if (type.code == kDLComplex) return phi::DataType::COMPLEX128;
PADDLE_THROW(common::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code,
type.bits));
default:
PADDLE_THROW(common::errors::Unimplemented(
"Unsupported DLDataType.bits %d.", type.bits));
}
}

/*
dlpack related code ref:
https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/DLConvertor.cpp
and paddle/phi/api/lib/tensor_utils.cc
*/
using Deleter = std::function<void(void*)>;

std::unordered_map<void*, std::function<void(phi::Allocation*)>> ptr_to_deleter;
std::mutex ptr_to_deleter_mutex; // use mutex to keep thread safe

void DeleterBridge(phi::Allocation* alloc) {
std::lock_guard<std::mutex> lock(ptr_to_deleter_mutex);
auto it = ptr_to_deleter.find(static_cast<void*>(alloc->ptr()));
if (it != ptr_to_deleter.end()) {
it->second(alloc); // call the deleter
ptr_to_deleter.erase(it); // remove the entry from the map safely
}
}

phi::DenseTensor from_blob(void* data,
DLManagedTensor* src,
const phi::DDim& shape,
phi::DataType dtype,
phi::DataLayout layout,
const phi::Place& place,
const Deleter& deleter) {
PADDLE_ENFORCE_NOT_NULL(
data, phi::errors::InvalidArgument("data can not be nullptr."));

auto meta = phi::DenseTensorMeta(dtype, shape, layout);
size_t size = SizeOf(dtype) * (meta.is_scalar ? 1 : product(meta.dims));
phi::Allocation::DeleterFnPtr f = nullptr;

if (deleter) {
auto g = [deleter, src](phi::Allocation* p) {
if (src->manager_ctx) {
deleter(src);
}
};

{
std::lock_guard<std::mutex> lock(ptr_to_deleter_mutex);
ptr_to_deleter[data] = g;
}

f = DeleterBridge;
}

auto alloc = std::make_shared<phi::Allocation>(data, size, f, place);
return phi::DenseTensor(alloc, meta);
}

phi::DenseTensor TensorFromDLPack(DLManagedTensor* src, Deleter deleter) {
std::vector<int64_t> vec;
std::copy(src->dl_tensor.shape,
src->dl_tensor.shape + src->dl_tensor.ndim,
std::back_inserter(vec));

phi::Place place;
if (src->dl_tensor.device.device_type == kDLCPU) {
place = phi::CPUPlace();
} else if (src->dl_tensor.device.device_type == kDLCUDA) {
place = phi::GPUPlace();
} else if (src->dl_tensor.device.device_type == kDLCUDAHost) {
place = phi::GPUPinnedPlace();
} else {
PADDLE_THROW(phi::errors::Unimplemented("Given Place is not supported"));
}

::DLDataType type = src->dl_tensor.dtype;
auto dtype = GetDstPtrByDLDataType(type);
if (!src->dl_tensor.strides) {
return from_blob(src->dl_tensor.data,
src,
common::make_ddim(vec),
dtype,
phi::DataLayout::NCHW,
place,
std::move(deleter));
} else {
return from_blob(src->dl_tensor.data,
src,
common::make_ddim(vec),
dtype,
phi::DataLayout::NCHW,
place,
deleter);
}
}

phi::DenseTensor TensorFromDLPack(DLManagedTensor* src) {
auto deleter = [src](void* self [[maybe_unused]]) {
if (src->deleter) {
src->deleter(src);
}
};
return TensorFromDLPack(src, std::move(deleter));
}

// Keep the this overloaded version of the interface unchanged.
void TensorFromDLPack(const ::DLTensor& dl_tensor, phi::DenseTensor* dst) {
phi::CPUPlace dst_place = phi::CPUPlace();
phi::CPUPlace src_place = phi::CPUPlace();
Expand All @@ -815,7 +968,7 @@ void TensorFromDLPack(const ::DLTensor& dl_tensor, phi::DenseTensor* dst) {
memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (dl_tensor.device.device_type == kDLGPU) {
if (dl_tensor.device.device_type == kDLCUDA) {
phi::GPUPlace dst_place = phi::GPUPlace(dl_tensor.device.device_id);
phi::GPUPlace src_place = phi::GPUPlace(dl_tensor.device.device_id);
dst_ptr = GetDstPtrByDLDataType(type, dst, dst_place);
Expand All @@ -833,46 +986,6 @@ void TensorFromDLPack(const ::DLTensor& dl_tensor, phi::DenseTensor* dst) {
#endif
}

void TensorFromDLPack(const DLManagedTensor* src, phi::DenseTensor* dst) {
std::vector<int64_t> vec;
std::copy(src->dl_tensor.shape,
src->dl_tensor.shape + src->dl_tensor.ndim,
std::back_inserter(vec));

phi::DDim vddim = common::make_ddim(vec);
dst->Resize(vddim);
::DLDataType type = src->dl_tensor.dtype;

auto src_ptr = static_cast<const void*>(src->dl_tensor.data);
auto size = common::product(vddim) * type.bits / 8;

if (src->dl_tensor.device.device_type == kDLCPU) {
phi::CPUPlace dst_place = phi::CPUPlace();
phi::CPUPlace src_place = phi::CPUPlace();
void* dst_ptr = GetDstPtrByDLDataType(type, dst, dst_place);
memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (src->dl_tensor.device.device_type == kDLGPU) {
phi::GPUPlace dst_place = phi::GPUPlace(src->dl_tensor.device.device_id);
phi::GPUPlace src_place = phi::GPUPlace(src->dl_tensor.device.device_id);
void* dst_ptr = GetDstPtrByDLDataType(type, dst, dst_place);
auto* ctx = phi::DeviceContextPool::Instance().GetByPlace(dst_place);
// Fix copy by share allocation.
memory::Copy(dst_place,
dst_ptr,
src_place,
src_ptr,
size,
reinterpret_cast<const phi::GPUContext&>(*ctx).stream());
}
#endif
src->deleter(const_cast<DLManagedTensor*>(src));
#ifdef PADDLE_WITH_XPU
PADDLE_THROW(common::errors::Unimplemented("XPUPlace is not supported"));
#endif
}

template <typename T>
std::string format_tensor(const phi::DenseTensor& tensor) {
// TODO(zhiqiu): use the print option to format tensor.
Expand Down
9 changes: 7 additions & 2 deletions paddle/fluid/framework/tensor_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,16 @@ template <typename T>
void TensorToVector(const phi::DenseTensor& src, std::vector<T>* dst);

// convert dlpack's DLTensor to tensor

TEST_API void TensorFromDLPack(const ::DLTensor& dl_tensor,
phi::DenseTensor* dst);
void TensorFromDLPack(const DLManagedTensor* src, phi::DenseTensor* dst);

TEST_API phi::DenseTensor TensorFromDLPack(DLManagedTensor* src);
inline phi::DenseTensor TensorFromDLPack(const DLManagedTensor* src) {
return TensorFromDLPack(const_cast<DLManagedTensor*>(src));
}

phi::DenseTensor TensorFromDLPack(DLManagedTensor* src,
std::function<void(void*)> deleter);
//
// The implementation of template functions.
//
Expand Down
30 changes: 12 additions & 18 deletions paddle/fluid/pybind/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1264,29 +1264,23 @@ PYBIND11_MODULE(libpaddle, m) {
phi::DeviceContextPool::Instance().Get(place)->Wait();
});

m.def("from_dlpack", [](py::capsule *dltensor) {
DLManagedTensor *dmt = reinterpret_cast<DLManagedTensor *>(
PyCapsule_GetPointer(dltensor->ptr(), "dltensor"));
m.def("from_dlpack", [](py::object data) {
DLManagedTensor *dlMTensor = reinterpret_cast<DLManagedTensor *>(
PyCapsule_GetPointer(data.ptr(), "dltensor"));

PADDLE_ENFORCE_NOT_NULL(
dmt,
common::errors::InvalidArgument(
dlMTensor,
phi::errors::InvalidArgument(
"from_dlpack received an invalid capsule. "
"Note that a DLPack tensor can be consumed only once."));
"Note that DLTensor capsules can be consumed only once, "
"so you might have already constructed a tensor from it once."));

PyCapsule_SetName(dltensor->ptr(), "used_dltensor");
DLTensor dl = dmt->dl_tensor;
phi::DenseTensor tensor;
// NOTE: Might meet bugged numpy version, see:
// https://github.com/pytorch/pytorch/blob/main/torch/csrc/utils/tensor_new.cpp#L1636-L1638
auto ptensor = paddle::framework::TensorFromDLPack(dlMTensor);

if (dl.device.device_type == kDLCPU) {
paddle::framework::TensorFromDLPack(dmt, &tensor);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (dl.device.device_type == kDLGPU) {
paddle::framework::TensorFromDLPack(dmt, &tensor);
}
#endif
return tensor;
PyCapsule_SetName(data.ptr(), "used_dltensor");
return ptensor;
});

m.def("_create_loaded_parameter",
Expand Down
Loading