[Ascend] use aclnnUnique2 impl unique (#1314)

--------- Co-authored-by: hellozmz <[email protected]>
DeepLink-org · Jul 30, 2024 · 6246289 · 6246289
1 parent aefa5b7
commit 6246289
Show file tree

Hide file tree

Showing 7 changed files with 125 additions and 17 deletions.
diff --git a/impl/ascend/aclnn/adaptor.hpp b/impl/ascend/aclnn/adaptor.hpp
@@ -367,7 +367,7 @@ void callAclnnImpl(diopiContextHandle_t ctx, const std::tuple<Args...>& tuple) {
         static constexpr const char kWorkspaceApiName[] = #api "GetWorkspaceSize";                                \
         auto convertedParams = ::impl::ascend::aclnn_adaptor::convertParams(__VA_ARGS__);                         \
         ::impl::ascend::aclnn_adaptor::callAclnnImpl<kApiName, kWorkspaceApiName>(ctx, convertedParams.params()); \
-    } while (false)
+    } while (false);
 
 #define DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(api, ctx, ...)                                             \
     do {                                                                                             \
@@ -377,12 +377,12 @@ void callAclnnImpl(diopiContextHandle_t ctx, const std::tuple<Args...>& tuple) {
         diopiStreamHandle_t stream;                                                                  \
         diopiGetStream(ctx, &stream);                                                                \
         CALL_ACLRT(aclrtSynchronizeStream(reinterpret_cast<aclrtStream>(stream)));                   \
-    } while (false)
+    } while (false);
 
 #define DIOPI_ASCEND_CALL_ACLNN_SYNC(api, ctx, ...)                                       \
     do {                                                                                  \
         auto convertedParams = ::impl::ascend::aclnn_adaptor::convertParams(__VA_ARGS__); \
         DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(api, ctx, convertedParams.params())             \
-    } while (false)
+    } while (false);
 
 #endif  // IMPL_ASCEND_ACLNN_ADAPTOR_HPP_
diff --git a/impl/ascend/common/utils.cpp b/impl/ascend/common/utils.cpp
@@ -7,6 +7,7 @@
 #include "utils.hpp"
 
 #include <array>
+#include <cstddef>
 #include <cstdint>
 #include <functional>
 #include <numeric>
@@ -15,6 +16,7 @@
 #include <typeinfo>
 #include <utility>
 
+#include "../aclnn/adaptor.hpp"
 #include "../ascend_tensor.hpp"
 #include "acloprunner.hpp"
 
@@ -186,6 +188,22 @@ diopiError_t reshape(diopiContextHandle_t ctx, const AscendTensor& src, AscendTe
     return diopiSuccess;
 }
 
+AscendTensor reshape(diopiContextHandle_t ctx, const AscendTensor& src, const std::vector<int64_t>& shape) {
+    ASCEND_CHECK_ABORT(src.defined(), "input tensor is nullptr.");
+
+    // if shape is the same as src, return src directly.
+    if (src.shape() == shape) {
+        return src;
+    }
+
+    // if shape is not the same as src, create a new tensor, then copy the data from src to the new tensor.
+    AscendTensor result, srcCopy(src);
+    makeTensor(ctx, result, shape, srcCopy.dtype());
+    DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, result, srcCopy.view(shape));
+
+    return AscendTensor(result.tensorHandle());
+}
+
 diopiError_t aclAsStridedCore(diopiContextHandle_t ctx, const AscendTensor& src, AscendTensor& dst) {
     diopiTensorHandle_t targetObj = const_cast<diopiTensorHandle_t>(static_cast<diopiConstTensorHandle_t>(dst));
     AclOpRunner<4, 1>("AsStrided", ctx)

diff --git a/impl/ascend/common/utils.hpp b/impl/ascend/common/utils.hpp
@@ -89,6 +89,8 @@ diopiError_t makeTensorFromScalar(diopiContextHandle_t ctx, AscendTensor& dst, c
 
 diopiError_t reshape(diopiContextHandle_t ctx, const AscendTensor& src, AscendTensor& dst, const std::vector<int64_t>& shape);
 
+AscendTensor reshape(diopiContextHandle_t ctx, const AscendTensor& src, const std::vector<int64_t>& shape);
+
 diopiError_t contiguous(diopiContextHandle_t ctx, const AscendTensor& src, AscendTensor& dst, diopiMemoryFormat_t format = diopiMemoryFormat_t::Contiguous);
 
 diopiError_t castTensor(diopiContextHandle_t ctx, const AscendTensor& src, AscendTensor& dst);

diff --git a/impl/ascend/device_configs.py b/impl/ascend/device_configs.py
@@ -795,25 +795,17 @@
 
     'unique': dict(
         name=['unique'],
-        tensor_para=dict(
-            args=[
-                {
-                    "ins": ['input'],
-                    "dtype": [Skip(np.int64),Skip(np.float32),Skip(np.float64),Skip(np.float16),Skip(np.int16),Skip(np.int32),Skip(np.uint8),Skip(np.int8),Skip(np.bool_),],
-                },
-            ]
+        para=dict(
+            # aclnnUnique2 only support that the value of dim is None
+            dim=[Skip(-2), Skip(-1), Skip(0), Skip(1), Skip(2)],
         ),
     ),
 
     'unique_same_value': dict(
         name=['unique'],
-        tensor_para=dict(
-            args=[
-                {
-                    "ins": ['input'],
-                    "dtype": [Skip(np.int64),Skip(np.float32),Skip(np.float64),Skip(np.float16),Skip(np.int16),Skip(np.int32),Skip(np.uint8),Skip(np.int8),Skip(np.bool_),],
-                },
-            ]
+        para=dict(
+            # aclnnUnique2 only support that the value of dim is None
+            dim=[Skip(-1), Skip(1)],
         ),
     ),
 

diff --git a/impl/ascend/functions/unique.cpp b/impl/ascend/functions/unique.cpp
@@ -0,0 +1,94 @@
+/**
+ * @file
+ * @author DeepLink
+ * @copyright  (c) 2024, DeepLink.
+ */
+
+#include <cstdint>
+#include <vector>
+
+#include "../aclnn/adaptor.hpp"
+#include "../common/utils.hpp"
+
+namespace impl {
+namespace ascend {
+
+diopiError_t diopiUnique(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, const int64_t* dim, bool sorted, bool returnCounts,
+                         diopiTensorHandle_t indices, diopiTensorHandle_t* counts) {
+    // aclnnUnique2 only supports when dim is nullptr. If dim is not nullptr, aclnnUniqueDim should be used.
+    ASCEND_CHECK_ABORT(dim == nullptr, "dim is not supported in aclnnUnique2");
+
+    // allocate temp out tensor
+    diopiTensorHandle_t outTmp = nullptr;
+    AscendTensor inputAt(input), outTmpAt(outTmp);
+    if (dim) {
+        ASCEND_CHECK_ABORT(false, "dim is not supported in aclnnUnique2, need use aclnnUniqueDim.");
+    } else {
+        makeTensor(ctx, outTmpAt, {inputAt.numel()}, inputAt.dtype());
+    }
+
+    // allocate temp inverse tensor
+    diopiTensorHandle_t inverseTmp = nullptr;
+    AscendTensor inverseTmpAt(inverseTmp);
+    bool returnInverse = (indices != nullptr) ? true : false;
+    std::vector<int64_t> zeroShape = {0};
+    if (returnInverse || returnCounts) {
+        makeTensor(ctx, inverseTmpAt, inputAt.shape(), diopi_dtype_int64);
+    } else {
+        makeTensor(ctx, inverseTmpAt, zeroShape, diopi_dtype_int64);
+    }
+
+    // allocate temp counts tensor
+    diopiTensorHandle_t countsTmp = nullptr;
+    AscendTensor countsTmpAt(countsTmp);
+    if (returnCounts) {
+        makeTensor(ctx, countsTmpAt, {inputAt.numel()}, diopi_dtype_int64);
+    } else {
+        makeTensor(ctx, countsTmpAt, zeroShape, diopi_dtype_int64);
+    }
+
+    // call aclnnUnique2
+    auto params = ::impl::ascend::aclnn_adaptor::convertParams(input, sorted, returnInverse, returnCounts, outTmpAt, inverseTmpAt, countsTmpAt).params();
+    DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(aclnnUnique2, ctx, params);
+
+    // get true outShape by aclGetViewShape
+    int64_t* viewDims = nullptr;
+    uint64_t viewDimNum = 0;
+    using aclGetViewShapeFunc = int (*)(const aclTensor* tensor, int64_t** viewDims, uint64_t* viewDimsNum);
+    static aclGetViewShapeFunc aclGetViewShape = reinterpret_cast<aclGetViewShapeFunc>(impl::ascend::aclnn_adaptor::getOpApiFuncAddr("aclGetViewShape"));
+    // get out tensor shape, out tensor is the 5th tensor in aclnnUnique2, index = 4
+    constexpr int64_t outputTensorIndex = 4;
+    int ret = aclGetViewShape(std::get<outputTensorIndex>(params), &viewDims, &viewDimNum);
+    ASCEND_CHECK_ABORT(ret == 0, "get out aclGetViewShape failed");
+
+    // fill out tensor
+    AscendTensor outReshapeAt = reshape(ctx, outTmpAt, {viewDims, viewDims + viewDimNum});
+    *out = const_cast<diopiTensorHandle_t>(outReshapeAt.tensorHandle());
+
+    // fill indices tensor
+    if (returnInverse) {
+        indices = const_cast<diopiTensorHandle_t>(inverseTmpAt.tensorHandle());
+    }
+
+    // fill counts tensor
+    if (returnCounts) {
+        // get counts tensor shape, counts tensor is the 7th tensor in aclnnUnique2, index = 6
+        constexpr int64_t countsTensorIndex = 6;
+        int ret2 = aclGetViewShape(std::get<countsTensorIndex>(params), &viewDims, &viewDimNum);
+        ASCEND_CHECK_ABORT(ret2 == 0, "get count aclGetViewShape failed");
+
+        AscendTensor countsReshapeAt = reshape(ctx, countsTmpAt, {viewDims, viewDims + viewDimNum});
+        *counts = const_cast<diopiTensorHandle_t>(countsReshapeAt.tensorHandle());
+    }
+
+    // delete viewDims pointer
+    if (viewDims) {
+        delete viewDims;
+        viewDims = nullptr;
+    }
+
+    return diopiSuccess;
+}
+
+}  // namespace ascend
+}  // namespace impl
diff --git a/impl/ascend_npu/CMakeLists.txt b/impl/ascend_npu/CMakeLists.txt
@@ -192,6 +192,7 @@ set(OLD_IMPL_SRC
     ${OLD_IMPL_DIR}/functions/max_pool2d.cpp
     ${OLD_IMPL_DIR}/functions/equal.cpp
     ${OLD_IMPL_DIR}/functions/masked_select.cpp
+    ${OLD_IMPL_DIR}/functions/unique.cpp
     ${OLD_IMPL_DIR}/functions_mmcv/roi_align_npu.cpp
     ${OLD_IMPL_DIR}/functions_ext/rms_norm.cpp
     ${OLD_IMPL_DIR}/functions_ext/rotary_embedding.cpp

diff --git a/impl/ascend_npu/ascend_config.yaml b/impl/ascend_npu/ascend_config.yaml
@@ -253,6 +253,7 @@ ascend:
 - diopiTriu
 - diopiTriuInp
 - diopiUniformInp
+- diopiUnique
 - diopiUpsampleLinear
 - diopiUpsampleLinearBackward
 - diopiUpsampleNearest