vllm-project · zzzzwwjj · Dec 29, 2025 · Dec 22, 2025 · Dec 22, 2025
@@ -24,7 +24,7 @@ elif [[ "$SOC_VERSION" =~ ^ascend910b ]]; then
     ABSOLUTE_CATLASS_PATH=$(cd "${CATLASS_PATH}" && pwd)
     export CPATH=${ABSOLUTE_CATLASS_PATH}:${CPATH}
 
-    CUSTOM_OPS="grouped_matmul_swiglu_quant_weight_nz_tensor_list;lightning_indexer;sparse_flash_attention;matmul_allreduce_add_rmsnorm"
+    CUSTOM_OPS="grouped_matmul_swiglu_quant_weight_nz_tensor_list;lightning_indexer;sparse_flash_attention;matmul_allreduce_add_rmsnorm;moe_init_routing_custom"
     SOC_ARG="ascend910b"
 elif [[ "$SOC_VERSION" =~ ^ascend910_93 ]]; then
     # ASCEND910C (A3) series
@@ -69,6 +69,7 @@ elif [[ "$SOC_VERSION" =~ ^ascend910_93 ]]; then
         "moe_dispatch_normal"
         "dispatch_layout"
         "notify_dispatch"
+        "moe_init_routing_custom"
     )
     CUSTOM_OPS=$(IFS=';'; echo "${CUSTOM_OPS_ARRAY[*]}")
     SOC_ARG="ascend910_93"

@@ -0,0 +1,55 @@
+# Copyright (c) 2025 Huawei Technologies Co., Ltd.
+# This file is a part of the CANN Open Software.
+# Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+# Please refer to the License for details. You may not use this file except in compliance with the License.
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+# See LICENSE in the root of the software repository for the full text of the License.
+# ======================================================================================================================
+
+add_ops_compile_options(
+        OP_NAME MoeInitRoutingCustom
+        OPTIONS --cce-auto-sync=on
+                -Wno-deprecated-declarations
+                -Werror
+)
+
+target_sources(op_host_aclnnExc PRIVATE
+        moe_init_routing_custom_def.cpp
+)
+
+target_sources(opapi PRIVATE
+        moe_init_routing_custom.cpp
+        aclnn_moe_init_routing_custom.cpp
+)
+
+if (NOT BUILD_OPEN_PROJECT)
+    target_sources(aclnn_ops_train PRIVATE
+        moe_init_routing_custom.cpp
+        aclnn_moe_init_routing_custom.cpp
+    )
+
+    target_sources(aclnn_ops_infer PRIVATE
+        moe_init_routing_custom.cpp
+        aclnn_moe_init_routing_custom.cpp
+    )
+endif ()
+
+target_sources(optiling PRIVATE
+        moe_init_routing_custom_tiling_base.cpp
+        moe_init_routing_custom_tiling.cpp
+)
+
+target_include_directories(optiling PRIVATE
+        ${CMAKE_CURRENT_SOURCE_DIR}
+)
+
+target_sources(opsproto PRIVATE
+        moe_init_routing_custom_infershape.cpp
+)
+
+file(GLOB _GMM_Aclnn_header "${CMAKE_CURRENT_SOURCE_DIR}/aclnn_moe_init_routing_custom.h")
+
+install(FILES ${_GMM_Aclnn_header}
+        DESTINATION ${ACLNN_INC_INSTALL_DIR} OPTIONAL
+)
@@ -0,0 +1,143 @@
+/**
+ * This program is free software, you can redistribute it and/or modify.
+ * Copyright (c) 2025 Huawei Technologies Co., Ltd.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 2.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+
+#include <algorithm>
+#include <tuple>
+#include <cstddef>
+#include "opdev/make_op_executor.h"
+#include "aclnn_kernels/contiguous.h"
+#include "opdev/tensor_view_utils.h"
+#include "aclnn_kernels/common/op_error_check.h"
+#include "opdev/op_log.h"
+#include "aclnn_kernels/cast.h"
+#include "opdev/common_types.h"
+#include "moe_init_routing_custom.h"
+#include "aclnn_moe_init_routing_custom.h"
+
+using namespace op;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+namespace {
+    static const int64_t MOE_DIM_2 = 2;
+    static const int64_t MOE_DIM_1 = 1;
+}
+
+static const std::initializer_list<DataType> DTYPE_SUPPORT_LIST_X= {DataType::DT_FLOAT16, DataType::DT_BF16, DataType::DT_FLOAT, DataType::DT_INT8};
+static const std::initializer_list<DataType> DTYPE_SUPPORT_LIST_EXPERT_IDX = {DataType::DT_INT32};
+static const std::initializer_list<DataType> DTYPE_SUPPORT_LIST_SCALE = {DataType::DT_FLOAT};
+static const std::initializer_list<DataType> DTYPE_SUPPORT_LIST_OFFSET= {DataType::DT_FLOAT};
+static const std::initializer_list<DataType> DTYPE_SUPPORT_LIST_EXPANDED_X_OUT = {DataType::DT_FLOAT16, DataType::DT_BF16, DataType::DT_FLOAT, DataType::DT_INT8};
+static const std::initializer_list<DataType> DTYPE_SUPPORT_LIST_EXPANDED_ROW_IDX_OUT = {DataType::DT_INT32};
+static const std::initializer_list<DataType> DTYPE_SUPPORT_LIST_EXPERT_TOKENS_COUNT_OR_CUMSUMOUT = {DataType::DT_INT64};
+static const std::initializer_list<DataType> DTYPE_SUPPORT_LIST_EXPANDED_SCALE_OUT = {DataType::DT_FLOAT};
+
+static inline bool CheckNotNull(const aclTensor *x, 
+                                const aclTensor *expertIdx,
+                                const aclTensor *expandedXOut, 
+                                const aclTensor *expandedRowIdxOut, 
+                                const aclTensor *expertTokensCountOrCumsumOut, 
+                                const aclTensor *expandedScaleOut) {
+    OP_CHECK_NULL(x, return false);
+    OP_CHECK_NULL(expertIdx, return false);
+    OP_CHECK_NULL(expandedXOut,  return false);
+    OP_CHECK_NULL(expandedRowIdxOut,  return false);
+    OP_CHECK_NULL(expertTokensCountOrCumsumOut, return false);
+    OP_CHECK_NULL(expandedScaleOut, return false);
+
+    return true;
+}
+
+aclnnStatus aclnnMoeInitRoutingCustomGetWorkspaceSize(const aclTensor *x, 
+                                                            const aclTensor *expertIdx,
+                                                            const aclTensor *scaleOptional,
+                                                            const aclTensor *offsetOptional, 
+                                                            int64_t activeNum, 
+                                                            int64_t expertCapacity, 
+                                                            int64_t expertNum, 
+                                                            int64_t dropPadMode, 
+                                                            int64_t expertTokensNumType, 
+                                                            bool expertTokensNumFlag, 
+                                                            int64_t quantMode, 
+                                                            const aclIntArray *activeExpertRangeOptional, 
+                                                            int64_t rowIdxType, 
+                                                            const aclTensor *expandedXOut, 
+                                                            const aclTensor *expandedRowIdxOut, 
+                                                            const aclTensor *expertTokensCountOrCumsumOut, 
+                                                            const aclTensor *expandedScaleOut, 
+                                                            uint64_t *workspaceSize, 
+                                                            aclOpExecutor **executor)                                                                                 
+{   
+    L2_DFX_PHASE_1(aclnnMoeInitRoutingCustom, 
+                    DFX_IN(x, expertIdx, scaleOptional, offsetOptional, 
+                            activeNum, expertCapacity, expertNum, dropPadMode, 
+                            expertTokensNumType, expertTokensNumFlag, quantMode, activeExpertRangeOptional, rowIdxType), 
+                    DFX_OUT(expandedXOut, expandedRowIdxOut, expertTokensCountOrCumsumOut, expandedScaleOut));
+    auto ret = CheckNotNull(x, expertIdx, expandedXOut, expandedRowIdxOut, 
+                            expertTokensCountOrCumsumOut, expandedScaleOut);
+
+    CHECK_RET(ret, ACLNN_ERR_PARAM_NULLPTR);
+
+    auto uniqueExecutor = CREATE_EXECUTOR();
+    CHECK_RET(uniqueExecutor.get() != nullptr, ACLNN_ERR_INNER_CREATE_EXECUTOR);
+
+    auto xContiguous = l0op::Contiguous(x, uniqueExecutor.get()); 
+    CHECK_RET(xContiguous != nullptr, ACLNN_ERR_INNER_CREATE_EXECUTOR);
+    auto expertIdxContiguous = l0op::Contiguous(expertIdx, uniqueExecutor.get()); 
+    CHECK_RET(expertIdxContiguous != nullptr, ACLNN_ERR_INNER_CREATE_EXECUTOR);
+
+    const aclTensor* scaleContiguous = nullptr;
+    const aclTensor* offsetContiguous = nullptr;
+    if (scaleOptional != nullptr) {
+        scaleContiguous = l0op::Contiguous(scaleOptional, uniqueExecutor.get()); 
+        CHECK_RET(scaleContiguous != nullptr, ACLNN_ERR_INNER_CREATE_EXECUTOR);
+    }
+
+    if (offsetOptional != nullptr) {
+        offsetContiguous = l0op::Contiguous(offsetOptional, uniqueExecutor.get()); 
+        CHECK_RET(offsetContiguous != nullptr, ACLNN_ERR_INNER_CREATE_EXECUTOR);
+    }
+
+    auto routingResult = std::tuple<aclTensor*, aclTensor*, aclTensor*, aclTensor*>(nullptr, nullptr, nullptr, nullptr);
+    routingResult = l0op::MoeInitRoutingCustom(xContiguous, expertIdxContiguous, scaleContiguous, offsetContiguous, 
+                                        activeNum, expertCapacity, expertNum, dropPadMode, expertTokensNumType, expertTokensNumFlag,
+                                        quantMode, activeExpertRangeOptional, rowIdxType, expandedXOut, expandedRowIdxOut, 
+                                        expertTokensCountOrCumsumOut, expandedScaleOut, uniqueExecutor.get());
+    auto [expandedXOut_, expandedRowIdxOut_, expertTokensCountOrCumsumOut_, expandedScaleOut_] = routingResult;
+    bool hasNullptr = (expandedXOut_ == nullptr) || (expandedRowIdxOut_ == nullptr) || (expertTokensCountOrCumsumOut_ == nullptr) || (expandedScaleOut_ == nullptr);
+    CHECK_RET(hasNullptr != true, ACLNN_ERR_INNER_NULLPTR);
+
+    auto viewCopyExpandedXOutResult = l0op::ViewCopy(expandedXOut_, expandedXOut, uniqueExecutor.get());
+    CHECK_RET(viewCopyExpandedXOutResult != nullptr, ACLNN_ERR_INNER_NULLPTR);
+    auto viewCopyExpandedRowIdxOutResult = l0op::ViewCopy(expandedRowIdxOut_, expandedRowIdxOut, uniqueExecutor.get());
+    CHECK_RET(viewCopyExpandedRowIdxOutResult != nullptr, ACLNN_ERR_INNER_NULLPTR);
+
+    auto viewCopyExpertTokensCountOrCumsumOutResult = l0op::ViewCopy(expertTokensCountOrCumsumOut_, expertTokensCountOrCumsumOut, uniqueExecutor.get());
+    CHECK_RET(viewCopyExpertTokensCountOrCumsumOutResult != nullptr, ACLNN_ERR_INNER_NULLPTR);
+
+    auto viewCopyExpandedScaleOutResult = l0op::ViewCopy(expandedScaleOut_, expandedScaleOut, uniqueExecutor.get());
+    CHECK_RET(viewCopyExpandedScaleOutResult != nullptr, ACLNN_ERR_INNER_NULLPTR);
+
+    *workspaceSize = uniqueExecutor->GetWorkspaceSize();
+    uniqueExecutor.ReleaseTo(executor);
+    return ACLNN_SUCCESS;
+}
+aclnnStatus aclnnMoeInitRoutingCustom(void* workspace, uint64_t workspaceSize, aclOpExecutor* executor,
+                                            aclrtStream stream)
+{
+  L2_DFX_PHASE_2(aclnnMoeInitRoutingCustom);
+  return CommonOpExecutorRun(workspace, workspaceSize, executor, stream);
+}
+
+#ifdef __cplusplus
+}
+#endif
@@ -0,0 +1,47 @@
+/**
+ * This program is free software, you can redistribute it and/or modify.
+ * Copyright (c) 2025 Huawei Technologies Co., Ltd.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 2.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+
+#ifndef OP_API_INC_MOE_INIT_ROUTING_CUSTOM_H_
+#define OP_API_INC_MOE_INIT_ROUTING_CUSTOM_H_
+
+#include "aclnn/aclnn_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+__attribute__((visibility("default"))) aclnnStatus aclnnMoeInitRoutingCustomGetWorkspaceSize(const aclTensor *x, 
+                                                            const aclTensor *expertIdx,
+                                                            const aclTensor *scaleOptional,
+                                                            const aclTensor *offsetOptional, 
+                                                            int64_t activeNum, 
+                                                            int64_t expertCapacity, 
+                                                            int64_t expertNum, 
+                                                            int64_t dropPadMode, 
+                                                            int64_t expertTokensNumType, 
+                                                            bool expertTokensNumFlag, 
+                                                            int64_t quantMode, 
+                                                            const aclIntArray *activeExpertRangeOptional, 
+                                                            int64_t rowIdxType, 
+                                                            const aclTensor *expandedXOut, 
+                                                            const aclTensor *expandedRowIdxOut, 
+                                                            const aclTensor *expertTokensCountOrCumsumOut, 
+                                                            const aclTensor *expandedScaleOut, 
+                                                            uint64_t *workspaceSize, 
+                                                            aclOpExecutor **executor);
+
+__attribute__((visibility("default"))) aclnnStatus aclnnMoeInitRoutingCustom(void* workspace, uint64_t workspaceSize, aclOpExecutor* executor,
+                                                aclrtStream stream);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
@@ -0,0 +1,50 @@
+/**
+ * This program is free software, you can redistribute it and/or modify.
+ * Copyright (c) 2025 Huawei Technologies Co., Ltd.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 2.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+
+#include <tuple>
+#include "moe_init_routing_custom.h"
+#include "opdev/make_op_executor.h"
+#include "opdev/op_def.h"
+#include "opdev/op_dfx.h"
+#include "opdev/op_executor.h"
+#include "opdev/op_log.h"
+#include "opdev/shape_utils.h"
+#include "aclnn_kernels/common/op_error_check.h"
+
+using namespace op;
+
+namespace l0op {
+OP_TYPE_REGISTER(MoeInitRoutingCustom);
+
+std::tuple<aclTensor*, aclTensor*, aclTensor*, aclTensor*> MoeInitRoutingCustom(const aclTensor *x, const aclTensor *expertIdx, const aclTensor *scale, 
+                                                                            const aclTensor *offset, int64_t activeNum, int64_t expertCapacity,
+                                                                            int64_t expertNum, int64_t dropPadMode, int64_t expertTokensNumType, 
+                                                                            bool expertTokensNumFlag, int64_t quantMode, const aclIntArray *activeExpertRange,
+                                                                            int64_t rowIdxType, const aclTensor *expandedX, const aclTensor *expandedRowIdx, 
+                                                                            const aclTensor *expertTokensCountOrCumsum,  const aclTensor *expandedScale, aclOpExecutor *executor)
+{
+    L0_DFX(MoeInitRoutingCustom, x, expertIdx, scale,  offset, activeNum, expertCapacity, expertNum, dropPadMode, expertTokensNumType, expertTokensNumFlag,
+            quantMode, activeExpertRange, rowIdxType, expandedX, expandedRowIdx, expertTokensCountOrCumsum, expandedScale);
+
+    auto expandedXOut = executor->AllocTensor(expandedX->GetViewShape(), expandedX->GetDataType(), Format::FORMAT_ND); 
+    auto expandedRowIdxOut = executor->AllocTensor(expandedRowIdx->GetViewShape(), expandedRowIdx->GetDataType(), Format::FORMAT_ND);
+    auto expertTokensCountOrCumsumOut = executor->AllocTensor(expertTokensCountOrCumsum->GetViewShape(), expertTokensCountOrCumsum->GetDataType(), Format::FORMAT_ND);
+    auto expandedScaleOut = executor->AllocTensor(expandedScale->GetViewShape(), expandedScale->GetDataType(), Format::FORMAT_ND);
+    if (expandedXOut == nullptr || expandedRowIdxOut == nullptr || expertTokensCountOrCumsumOut == nullptr || expandedScaleOut == nullptr) {
+        OP_LOGE(ACLNN_ERR_INNER_NULLPTR, "alloc expandedXOut or expandedRowIdxOut or expertTokensCountOrCumsumOut or expandedScaleOut tensor failed.");
+        return std::tuple<aclTensor*, aclTensor*, aclTensor*, aclTensor*>(nullptr, nullptr, nullptr, nullptr);
+    }
+
+    ADD_TO_LAUNCHER_LIST_AICORE(
+        MoeInitRoutingCustom, OP_INPUT(x, expertIdx, scale, offset), OP_OUTPUT(expandedXOut, expandedRowIdxOut, expertTokensCountOrCumsumOut, expandedScaleOut), OP_ATTR(activeNum, expertCapacity, expertNum, dropPadMode, expertTokensNumType, expertTokensNumFlag, quantMode, activeExpertRange, rowIdxType));
+    return std::tuple<aclTensor*, aclTensor*, aclTensor*, aclTensor*>(expandedXOut, expandedRowIdxOut, expertTokensCountOrCumsumOut, expandedScaleOut); //OP_OUTPUT
+}
+
+}  // namespace l0op
@@ -0,0 +1,25 @@
+/**
+ * This program is free software, you can redistribute it and/or modify.
+ * Copyright (c) 2025 Huawei Technologies Co., Ltd.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 2.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+
+#ifndef OP_API_INC_LEVEL0_MOE_INIT_ROUTING_CUSTOM_H
+#define OP_API_INC_LEVEL0_MOE_INIT_ROUTING_CUSTOM_H
+
+#include <tuple>
+#include "opdev/op_executor.h"
+
+namespace l0op {
+std::tuple<aclTensor*, aclTensor*, aclTensor*, aclTensor*> MoeInitRoutingCustom(const aclTensor *x, const aclTensor *expertIdx, const aclTensor *scale, 
+                                                                            const aclTensor *offset, int64_t activeNum, int64_t expertCapacity,
+                                                                            int64_t expertNum, int64_t dropPadMode, int64_t expertTokensNumType, 
+                                                                            bool expertTokensNumFlag, int64_t quantMode, const aclIntArray *activeExpertRange,
+                                                                            int64_t rowIdxType, const aclTensor *expandedX, const aclTensor *expandedRowIdx, 
+                                                                            const aclTensor *expertTokensCountOrCumsum,  const aclTensor *expandedScale, aclOpExecutor *executor); 
+}  // namespace l0op
+#endif // OP_API_INC_LEVEL0_MOE_INIT_ROUTING_CUSTOM_H