From 6246289f26beb12e4b02de9e7f4b1b08d422ab21 Mon Sep 17 00:00:00 2001 From: hellozmz <407190054@qq.com> Date: Tue, 30 Jul 2024 17:39:04 +0800 Subject: [PATCH] [Ascend] use aclnnUnique2 impl unique (#1314) --------- Co-authored-by: hellozmz <40719054@qq.com> --- impl/ascend/aclnn/adaptor.hpp | 6 +- impl/ascend/common/utils.cpp | 18 ++++++ impl/ascend/common/utils.hpp | 2 + impl/ascend/device_configs.py | 20 ++----- impl/ascend/functions/unique.cpp | 94 ++++++++++++++++++++++++++++++ impl/ascend_npu/CMakeLists.txt | 1 + impl/ascend_npu/ascend_config.yaml | 1 + 7 files changed, 125 insertions(+), 17 deletions(-) create mode 100644 impl/ascend/functions/unique.cpp diff --git a/impl/ascend/aclnn/adaptor.hpp b/impl/ascend/aclnn/adaptor.hpp index 0d67d4093..117423c78 100644 --- a/impl/ascend/aclnn/adaptor.hpp +++ b/impl/ascend/aclnn/adaptor.hpp @@ -367,7 +367,7 @@ void callAclnnImpl(diopiContextHandle_t ctx, const std::tuple& tuple) { static constexpr const char kWorkspaceApiName[] = #api "GetWorkspaceSize"; \ auto convertedParams = ::impl::ascend::aclnn_adaptor::convertParams(__VA_ARGS__); \ ::impl::ascend::aclnn_adaptor::callAclnnImpl(ctx, convertedParams.params()); \ - } while (false) + } while (false); #define DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(api, ctx, ...) \ do { \ @@ -377,12 +377,12 @@ void callAclnnImpl(diopiContextHandle_t ctx, const std::tuple& tuple) { diopiStreamHandle_t stream; \ diopiGetStream(ctx, &stream); \ CALL_ACLRT(aclrtSynchronizeStream(reinterpret_cast(stream))); \ - } while (false) + } while (false); #define DIOPI_ASCEND_CALL_ACLNN_SYNC(api, ctx, ...) \ do { \ auto convertedParams = ::impl::ascend::aclnn_adaptor::convertParams(__VA_ARGS__); \ DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(api, ctx, convertedParams.params()) \ - } while (false) + } while (false); #endif // IMPL_ASCEND_ACLNN_ADAPTOR_HPP_ diff --git a/impl/ascend/common/utils.cpp b/impl/ascend/common/utils.cpp index 5ba21de71..fe9da2c0e 100644 --- a/impl/ascend/common/utils.cpp +++ b/impl/ascend/common/utils.cpp @@ -7,6 +7,7 @@ #include "utils.hpp" #include +#include #include #include #include @@ -15,6 +16,7 @@ #include #include +#include "../aclnn/adaptor.hpp" #include "../ascend_tensor.hpp" #include "acloprunner.hpp" @@ -186,6 +188,22 @@ diopiError_t reshape(diopiContextHandle_t ctx, const AscendTensor& src, AscendTe return diopiSuccess; } +AscendTensor reshape(diopiContextHandle_t ctx, const AscendTensor& src, const std::vector& shape) { + ASCEND_CHECK_ABORT(src.defined(), "input tensor is nullptr."); + + // if shape is the same as src, return src directly. + if (src.shape() == shape) { + return src; + } + + // if shape is not the same as src, create a new tensor, then copy the data from src to the new tensor. + AscendTensor result, srcCopy(src); + makeTensor(ctx, result, shape, srcCopy.dtype()); + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, result, srcCopy.view(shape)); + + return AscendTensor(result.tensorHandle()); +} + diopiError_t aclAsStridedCore(diopiContextHandle_t ctx, const AscendTensor& src, AscendTensor& dst) { diopiTensorHandle_t targetObj = const_cast(static_cast(dst)); AclOpRunner<4, 1>("AsStrided", ctx) diff --git a/impl/ascend/common/utils.hpp b/impl/ascend/common/utils.hpp index 80b1ce056..05314907d 100644 --- a/impl/ascend/common/utils.hpp +++ b/impl/ascend/common/utils.hpp @@ -89,6 +89,8 @@ diopiError_t makeTensorFromScalar(diopiContextHandle_t ctx, AscendTensor& dst, c diopiError_t reshape(diopiContextHandle_t ctx, const AscendTensor& src, AscendTensor& dst, const std::vector& shape); +AscendTensor reshape(diopiContextHandle_t ctx, const AscendTensor& src, const std::vector& shape); + diopiError_t contiguous(diopiContextHandle_t ctx, const AscendTensor& src, AscendTensor& dst, diopiMemoryFormat_t format = diopiMemoryFormat_t::Contiguous); diopiError_t castTensor(diopiContextHandle_t ctx, const AscendTensor& src, AscendTensor& dst); diff --git a/impl/ascend/device_configs.py b/impl/ascend/device_configs.py index 441352a02..1377c420e 100755 --- a/impl/ascend/device_configs.py +++ b/impl/ascend/device_configs.py @@ -795,25 +795,17 @@ 'unique': dict( name=['unique'], - tensor_para=dict( - args=[ - { - "ins": ['input'], - "dtype": [Skip(np.int64),Skip(np.float32),Skip(np.float64),Skip(np.float16),Skip(np.int16),Skip(np.int32),Skip(np.uint8),Skip(np.int8),Skip(np.bool_),], - }, - ] + para=dict( + # aclnnUnique2 only support that the value of dim is None + dim=[Skip(-2), Skip(-1), Skip(0), Skip(1), Skip(2)], ), ), 'unique_same_value': dict( name=['unique'], - tensor_para=dict( - args=[ - { - "ins": ['input'], - "dtype": [Skip(np.int64),Skip(np.float32),Skip(np.float64),Skip(np.float16),Skip(np.int16),Skip(np.int32),Skip(np.uint8),Skip(np.int8),Skip(np.bool_),], - }, - ] + para=dict( + # aclnnUnique2 only support that the value of dim is None + dim=[Skip(-1), Skip(1)], ), ), diff --git a/impl/ascend/functions/unique.cpp b/impl/ascend/functions/unique.cpp new file mode 100644 index 000000000..3b1f87ebe --- /dev/null +++ b/impl/ascend/functions/unique.cpp @@ -0,0 +1,94 @@ +/** + * @file + * @author DeepLink + * @copyright (c) 2024, DeepLink. + */ + +#include +#include + +#include "../aclnn/adaptor.hpp" +#include "../common/utils.hpp" + +namespace impl { +namespace ascend { + +diopiError_t diopiUnique(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, const int64_t* dim, bool sorted, bool returnCounts, + diopiTensorHandle_t indices, diopiTensorHandle_t* counts) { + // aclnnUnique2 only supports when dim is nullptr. If dim is not nullptr, aclnnUniqueDim should be used. + ASCEND_CHECK_ABORT(dim == nullptr, "dim is not supported in aclnnUnique2"); + + // allocate temp out tensor + diopiTensorHandle_t outTmp = nullptr; + AscendTensor inputAt(input), outTmpAt(outTmp); + if (dim) { + ASCEND_CHECK_ABORT(false, "dim is not supported in aclnnUnique2, need use aclnnUniqueDim."); + } else { + makeTensor(ctx, outTmpAt, {inputAt.numel()}, inputAt.dtype()); + } + + // allocate temp inverse tensor + diopiTensorHandle_t inverseTmp = nullptr; + AscendTensor inverseTmpAt(inverseTmp); + bool returnInverse = (indices != nullptr) ? true : false; + std::vector zeroShape = {0}; + if (returnInverse || returnCounts) { + makeTensor(ctx, inverseTmpAt, inputAt.shape(), diopi_dtype_int64); + } else { + makeTensor(ctx, inverseTmpAt, zeroShape, diopi_dtype_int64); + } + + // allocate temp counts tensor + diopiTensorHandle_t countsTmp = nullptr; + AscendTensor countsTmpAt(countsTmp); + if (returnCounts) { + makeTensor(ctx, countsTmpAt, {inputAt.numel()}, diopi_dtype_int64); + } else { + makeTensor(ctx, countsTmpAt, zeroShape, diopi_dtype_int64); + } + + // call aclnnUnique2 + auto params = ::impl::ascend::aclnn_adaptor::convertParams(input, sorted, returnInverse, returnCounts, outTmpAt, inverseTmpAt, countsTmpAt).params(); + DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(aclnnUnique2, ctx, params); + + // get true outShape by aclGetViewShape + int64_t* viewDims = nullptr; + uint64_t viewDimNum = 0; + using aclGetViewShapeFunc = int (*)(const aclTensor* tensor, int64_t** viewDims, uint64_t* viewDimsNum); + static aclGetViewShapeFunc aclGetViewShape = reinterpret_cast(impl::ascend::aclnn_adaptor::getOpApiFuncAddr("aclGetViewShape")); + // get out tensor shape, out tensor is the 5th tensor in aclnnUnique2, index = 4 + constexpr int64_t outputTensorIndex = 4; + int ret = aclGetViewShape(std::get(params), &viewDims, &viewDimNum); + ASCEND_CHECK_ABORT(ret == 0, "get out aclGetViewShape failed"); + + // fill out tensor + AscendTensor outReshapeAt = reshape(ctx, outTmpAt, {viewDims, viewDims + viewDimNum}); + *out = const_cast(outReshapeAt.tensorHandle()); + + // fill indices tensor + if (returnInverse) { + indices = const_cast(inverseTmpAt.tensorHandle()); + } + + // fill counts tensor + if (returnCounts) { + // get counts tensor shape, counts tensor is the 7th tensor in aclnnUnique2, index = 6 + constexpr int64_t countsTensorIndex = 6; + int ret2 = aclGetViewShape(std::get(params), &viewDims, &viewDimNum); + ASCEND_CHECK_ABORT(ret2 == 0, "get count aclGetViewShape failed"); + + AscendTensor countsReshapeAt = reshape(ctx, countsTmpAt, {viewDims, viewDims + viewDimNum}); + *counts = const_cast(countsReshapeAt.tensorHandle()); + } + + // delete viewDims pointer + if (viewDims) { + delete viewDims; + viewDims = nullptr; + } + + return diopiSuccess; +} + +} // namespace ascend +} // namespace impl diff --git a/impl/ascend_npu/CMakeLists.txt b/impl/ascend_npu/CMakeLists.txt index 44da6db15..ba7701105 100755 --- a/impl/ascend_npu/CMakeLists.txt +++ b/impl/ascend_npu/CMakeLists.txt @@ -192,6 +192,7 @@ set(OLD_IMPL_SRC ${OLD_IMPL_DIR}/functions/max_pool2d.cpp ${OLD_IMPL_DIR}/functions/equal.cpp ${OLD_IMPL_DIR}/functions/masked_select.cpp + ${OLD_IMPL_DIR}/functions/unique.cpp ${OLD_IMPL_DIR}/functions_mmcv/roi_align_npu.cpp ${OLD_IMPL_DIR}/functions_ext/rms_norm.cpp ${OLD_IMPL_DIR}/functions_ext/rotary_embedding.cpp diff --git a/impl/ascend_npu/ascend_config.yaml b/impl/ascend_npu/ascend_config.yaml index 4adf5db16..9dbdec336 100755 --- a/impl/ascend_npu/ascend_config.yaml +++ b/impl/ascend_npu/ascend_config.yaml @@ -253,6 +253,7 @@ ascend: - diopiTriu - diopiTriuInp - diopiUniformInp +- diopiUnique - diopiUpsampleLinear - diopiUpsampleLinearBackward - diopiUpsampleNearest