Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ set(VLLM_ASCEND_CUSTOM_OP
)

set(VLLM_ASCEND_CUSTOM_OP_EXCLUDE
${KERNEL_FILES}/bgmv_expand.cpp
${KERNEL_FILES}/bgmv_shrink.cpp
${KERNEL_FILES}/sgmv_expand.cpp
${KERNEL_FILES}/sgmv_shrink.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_kernel/batch_matmul_transpose_kernel.cpp
)

Expand Down
6 changes: 3 additions & 3 deletions csrc/kernels/bgmv_expand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ class BGMVExpand {

// declare all dtype kernel
BGMV_EXPAND_TYPE_DECLARE(half)
#if (__CCE_AICORE__ >= 220)
#if !defined(__CCE_AICORE__) || (__CCE_AICORE__ >= 220)
BGMV_EXPAND_TYPE_DECLARE(bfloat16_t)
#endif

Expand All @@ -356,8 +356,8 @@ extern void bgmv_expand_impl(AscendType type, void* stream, void* x, void* weigh
bgmv_expand_half<<<blockDim, nullptr, stream>>>(x, weight, indices, indicesSize, yIn, yOut, batchSize, numTokensPerCore,
maxLoRARank, outputHiddenDim, sliceOffset, outputFullDim);
} else if (type == AscendType::BF16) {
#if (__CCE_AICORE__ >= 220)
bgmv_expand_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, indices, indicesSize, yIn, yOut, batchSize,
#if !defined(__CCE_AICORE__) || (__CCE_AICORE__ >= 220)
bgmv_expand_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, indices, indicesSize, yIn, yOut, batchSize,
numTokensPerCore, maxLoRARank, outputHiddenDim,
sliceOffset, outputFullDim);
#endif
Expand Down
6 changes: 3 additions & 3 deletions csrc/kernels/bgmv_shrink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ class BGMVShrink {

// declare all dtype kernel
BGMV_SHRINK_TYPE_DECLARE(half)
#if (__CCE_AICORE__ >= 220)
#if !defined(__CCE_AICORE__) || (__CCE_AICORE__ >= 220)
BGMV_SHRINK_TYPE_DECLARE(bfloat16_t)
#endif

Expand All @@ -240,8 +240,8 @@ extern void bgmv_shrink_impl(AscendType type, void* stream, void* x, void* weigh
bgmv_shrink_half<<<blockDim, nullptr, stream>>>(x, weight, indices, indicesSize, y, batchSize, numTokensPerCore,
inputHiddenDim, maxLoRARank, scale);
} else if (type == AscendType::BF16) {
#if (__CCE_AICORE__ >= 220)
bgmv_shrink_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, indices, indicesSize, y, batchSize, numTokensPerCore,
#if !defined(__CCE_AICORE__) || (__CCE_AICORE__ >= 220)
bgmv_shrink_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, indices, indicesSize, y, batchSize, numTokensPerCore,
inputHiddenDim, maxLoRARank, scale);
#endif
} else {
Expand Down
6 changes: 3 additions & 3 deletions csrc/kernels/sgmv_expand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ class SGMVExpand {

// declare all dtype kernel
SGMV_EXPAND_TYPE_DECLARE(half)
#if (__CCE_AICORE__ >= 220)
#if !defined(__CCE_AICORE__) || (__CCE_AICORE__ >= 220)
SGMV_EXPAND_TYPE_DECLARE(bfloat16_t)
#endif

Expand All @@ -375,8 +375,8 @@ extern void sgmv_expand_impl(AscendType type, void* stream, void* x, void* weigh
numTokensPerCore, maxLoRARank, outputHiddenDim, sliceOffset,
outputFullDim);
} else if (type == AscendType::BF16) {
#if (__CCE_AICORE__ >= 220)
sgmv_expand_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, loraIndices, loraIndicesSize,
#if !defined(__CCE_AICORE__) || (__CCE_AICORE__ >= 220)
sgmv_expand_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, loraIndices, loraIndicesSize,
seqLen, seqLenSize, yIn, yOut, batchSize,
numTokensPerCore, maxLoRARank, outputHiddenDim,
sliceOffset, outputFullDim);
Expand Down
6 changes: 3 additions & 3 deletions csrc/kernels/sgmv_shrink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ class SGMVShrink {

// declare all dtype kernel
SGMV_SHRINK_TYPE_DECLARE(half)
#if (__CCE_AICORE__ >= 220)
#if !defined(__CCE_AICORE__) || (__CCE_AICORE__ >= 220)
SGMV_SHRINK_TYPE_DECLARE(bfloat16_t)
#endif

Expand All @@ -260,8 +260,8 @@ extern void sgmv_shrink_impl(AscendType type, void* stream, void* x, void* weigh
numTokensPerCore, inputHiddenDim, maxLoRARank,
scale);
} else if (type == AscendType::BF16) {
#if (__CCE_AICORE__ >= 220)
sgmv_shrink_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, loraIndices, loraIndicesSize,
#if !defined(__CCE_AICORE__) || (__CCE_AICORE__ >= 220)
sgmv_shrink_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, loraIndices, loraIndicesSize,
seqLen, seqLenSize,
y, batchSize,
numTokensPerCore, inputHiddenDim, maxLoRARank,
Expand Down
1 change: 1 addition & 0 deletions vllm_ascend/lora/punica_npu.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ def add_lora_embedding(self,
# Embedding layer only need expand op
expand_fun: Callable = (self._expand_prefill
if self.is_prefill else self._expand_decode)
x = x.to(torch.float32)
expand_fun(y, x, lora_b_stacked, add_inputs)

def add_lora_linear(self,
Expand Down
Loading