Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Ascend] use aclnnUnique2 impl unique #1314

Merged
merged 8 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 6 additions & 14 deletions impl/ascend/device_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,25 +795,17 @@

'unique': dict(
name=['unique'],
tensor_para=dict(
args=[
{
"ins": ['input'],
"dtype": [Skip(np.int64),Skip(np.float32),Skip(np.float64),Skip(np.float16),Skip(np.int16),Skip(np.int32),Skip(np.uint8),Skip(np.int8),Skip(np.bool_),],
},
]
para=dict(
# aclnnUnique2 only support that the value of dim is None
dim=[Skip(-2), Skip(-1), Skip(0), Skip(1), Skip(2)],
),
),

'unique_same_value': dict(
name=['unique'],
tensor_para=dict(
args=[
{
"ins": ['input'],
"dtype": [Skip(np.int64),Skip(np.float32),Skip(np.float64),Skip(np.float16),Skip(np.int16),Skip(np.int32),Skip(np.uint8),Skip(np.int8),Skip(np.bool_),],
},
]
para=dict(
# aclnnUnique2 only support that the value of dim is None
dim=[Skip(-1), Skip(1)],
),
),

Expand Down
114 changes: 114 additions & 0 deletions impl/ascend/functions/unique.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/**
* @file
* @author DeepLink
* @copyright (c) 2024, DeepLink.
*/

#include <cstdint>
#include <iostream>
#include <vector>

#include "../aclnn/adaptor.hpp"

namespace impl {
namespace ascend {

diopiError_t diopiUnique(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, const int64_t* dim, bool sorted, bool returnCounts,
diopiTensorHandle_t indices, diopiTensorHandle_t* counts) {
// aclnnUnique2 only support dim == nullptr
ASCEND_CHECK_ABORT(dim == nullptr, "dim is not supported in aclnnUnique2");
Copy link
Collaborator

@yangbofun yangbofun Jul 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

华为有aclnnUniqueDim来调用 带dim的unique.
image

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

华为有aclnnUniqueDim来调用 带dim的unique. image

嗯嗯,尝试用aclnnUniqueDim去实现支持dim的场景,不过有些测试用例没有通过,看模型中暂时使用的是unique2,先实现了不使用dim的算子,并且用ASCEND_CHECK_ABORT检查了dim。

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

华为有aclnnUniqueDim来调用 带dim的unique. image

嗯嗯,尝试用aclnnUniqueDim去实现支持dim的场景,不过有些测试用例没有通过,看模型中暂时使用的是unique2,先实现了不使用dim的算子,并且用ASCEND_CHECK_ABORT检查了dim。

在pr描述里写一下,并且在ASCEND_CHECK_ABORT这里写上可以调用aclnnUniqueDim.


bool returnInverse = (indices != nullptr) ? true : false;
AscendTensor inputAt(input);
const std::vector<int64_t>& inSizeVec = inputAt.shape();
diopiSize_t inSize = {inSizeVec.data(), static_cast<int64_t>(inSizeVec.size())};
std::vector<int64_t> numelSizeVec{inputAt.numel()};
diopiSize_t numelSize = {numelSizeVec.data(), static_cast<int64_t>(numelSizeVec.size())};
std::vector<int64_t> zeroSizeVec = {0};
diopiSize_t zeroSize = {zeroSizeVec.data(), 1};

// allocate temp out tensor
diopiTensorHandle_t outTmp = nullptr;
if (dim) {
diopiRequireTensor(ctx, &outTmp, &inSize, nullptr, inputAt.dtype(), diopi_device);
} else {
diopiRequireTensor(ctx, &outTmp, &numelSize, nullptr, inputAt.dtype(), diopi_device);
}

// allocate temp inverse tensor
diopiTensorHandle_t inverseTmp = nullptr;
if (returnInverse || returnCounts) {
diopiRequireTensor(ctx, &inverseTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device);
Copy link
Collaborator

@yangbofun yangbofun Jul 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

参考camb的requiresTensor写一些函数通用函数(华为上叫makeTensor),这样就不用每次都从diopiSize_t来获取tensor

} else {
diopiRequireTensor(ctx, &inverseTmp, &zeroSize, nullptr, diopi_dtype_int64, diopi_device);
}

// allocate temp counts tensor
diopiTensorHandle_t countsTmp = nullptr;
if (returnCounts) {
diopiRequireTensor(ctx, &countsTmp, &numelSize, nullptr, diopi_dtype_int64, diopi_device);
} else {
diopiRequireTensor(ctx, &countsTmp, &zeroSize, nullptr, diopi_dtype_int64, diopi_device);
}

// call aclnnUnique2
auto params = ::impl::ascend::aclnn_adaptor::convertParams(input, sorted, returnInverse, returnCounts, outTmp, inverseTmp, countsTmp).params();
DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(aclnnUnique2, ctx, params);

// get true outShape by aclGetViewShape
int64_t* viewDims = nullptr;
uint64_t viewDimNum = 0;
using aclGetViewShapeFunc = int (*)(const aclTensor* tensor, int64_t** viewDims, uint64_t* viewDimsNum);
static aclGetViewShapeFunc aclGetViewShape = reinterpret_cast<aclGetViewShapeFunc>(impl::ascend::aclnn_adaptor::getOpApiFuncAddr("aclGetViewShape"));
// get out tensor shape, out tensor is the 5th tensor in aclnnUnique2, index = 4
constexpr int64_t outputTensorIndex = 4;
int ret = aclGetViewShape(std::get<outputTensorIndex>(params), &viewDims, &viewDimNum);
ASCEND_CHECK_ABORT(ret == 0, "get out aclGetViewShape failed");

// fill out tensor
diopiSize_t outShape{viewDims, static_cast<int64_t>(viewDimNum)};
diopiRequireTensor(ctx, out, &outShape, nullptr, inputAt.dtype(), diopi_device);
AscendTensor outAt(*out);
AscendTensor outTmpAt(outTmp);
outTmpAt.view({outShape.data, outShape.data + outShape.len});
DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, outAt, outTmpAt);

// fill indices tensor
if (returnInverse) {
AscendTensor inverseTmpAt(inverseTmp);

diopiSize_t inSize = {inverseTmpAt.shape().data(), static_cast<int64_t>(inverseTmpAt.shape().size())};
AscendTensor indicesTmpAt(indices);
if (indicesTmpAt.shape() != inverseTmpAt.shape()) {
diopiRequireTensor(ctx, &indices, &inSize, nullptr, diopi_dtype_int64, diopi_device);
}
AscendTensor indicesAt(indices);
DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, indicesAt, inverseTmpAt);
}

// fill counts tensor
if (returnCounts) {
AscendTensor countsTmpAt(countsTmp);
// get counts tensor shape, counts tensor is the 7th tensor in aclnnUnique2, index = 6
constexpr int64_t countsTensorIndex = 6;
int ret2 = aclGetViewShape(std::get<countsTensorIndex>(params), &viewDims, &viewDimNum);
ASCEND_CHECK_ABORT(ret2 == 0, "get count aclGetViewShape failed");
diopiSize_t countShape{viewDims, static_cast<int64_t>(viewDimNum)};
diopiRequireTensor(ctx, counts, &countShape, nullptr, countsTmpAt.dtype(), diopi_device);
AscendTensor countsAt(*counts);
countsTmpAt.view({countShape.data, countShape.data + countShape.len});

DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, countsAt, countsTmpAt);
}

// delete viewDims pointer
if (viewDims) {
delete viewDims;
viewDims = nullptr;
}

return diopiSuccess;
}

} // namespace ascend
} // namespace impl
1 change: 1 addition & 0 deletions impl/ascend_npu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ set(OLD_IMPL_SRC
${OLD_IMPL_DIR}/functions/max_pool2d.cpp
${OLD_IMPL_DIR}/functions/equal.cpp
${OLD_IMPL_DIR}/functions/masked_select.cpp
${OLD_IMPL_DIR}/functions/unique.cpp
${OLD_IMPL_DIR}/functions_mmcv/roi_align_npu.cpp
${OLD_IMPL_DIR}/functions_ext/rms_norm.cpp
${OLD_IMPL_DIR}/functions_ext/rotary_embedding.cpp
Expand Down
1 change: 1 addition & 0 deletions impl/ascend_npu/ascend_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ ascend:
- diopiTriu
- diopiTriuInp
- diopiUniformInp
- diopiUnique
- diopiUpsampleLinear
- diopiUpsampleLinearBackward
- diopiUpsampleNearest
Expand Down
Loading