From e4a6e8e52248ed15a3fccdb9e8e7cf5713f57325 Mon Sep 17 00:00:00 2001 From: HF-001 <1670186653@qq.com> Date: Mon, 13 Apr 2026 11:13:02 +0800 Subject: [PATCH 1/2] [fix] fix Ascend310P3 error Signed-off-by: HF-001 <1670186653@qq.com> --- vllm_ascend/ops/triton/gdn_chunk_meta.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/ops/triton/gdn_chunk_meta.py b/vllm_ascend/ops/triton/gdn_chunk_meta.py index 7af19b91420..00d7e953956 100644 --- a/vllm_ascend/ops/triton/gdn_chunk_meta.py +++ b/vllm_ascend/ops/triton/gdn_chunk_meta.py @@ -16,6 +16,8 @@ import torch from vllm.triton_utils import tl, triton +from vllm_ascend.utils import is_310p + def _cdiv(x: int, y: int) -> int: triton_cdiv = getattr(triton, "cdiv", None) @@ -156,7 +158,12 @@ def _build_final_chunk_indices( out_final_chunk_indices: torch.Tensor, ) -> None: num_seqs = chunk_counts.shape[0] - if hasattr(_build_final_chunk_indices_kernel, "__getitem__"): + # 310P does not support Triton kernel compilation (bishengir-compile + # cannot target Ascend310P), so always use the PyTorch fallback path. + if ( + not is_310p() + and hasattr(_build_final_chunk_indices_kernel, "__getitem__") + ): block_size = 256 grid = (_cdiv(num_seqs, block_size),) _build_final_chunk_indices_kernel[grid]( From 6479dc8a99426dc7d09ef2d1a7bf65a67f943369 Mon Sep 17 00:00:00 2001 From: HF-001 <1670186653@qq.com> Date: Mon, 13 Apr 2026 11:25:30 +0800 Subject: [PATCH 2/2] [fix] fix Ascend310P3 error Signed-off-by: HF-001 <1670186653@qq.com> --- vllm_ascend/ops/triton/gdn_chunk_meta.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/vllm_ascend/ops/triton/gdn_chunk_meta.py b/vllm_ascend/ops/triton/gdn_chunk_meta.py index 00d7e953956..1a9f5a905b5 100644 --- a/vllm_ascend/ops/triton/gdn_chunk_meta.py +++ b/vllm_ascend/ops/triton/gdn_chunk_meta.py @@ -160,10 +160,7 @@ def _build_final_chunk_indices( num_seqs = chunk_counts.shape[0] # 310P does not support Triton kernel compilation (bishengir-compile # cannot target Ascend310P), so always use the PyTorch fallback path. - if ( - not is_310p() - and hasattr(_build_final_chunk_indices_kernel, "__getitem__") - ): + if not is_310p() and hasattr(_build_final_chunk_indices_kernel, "__getitem__"): block_size = 256 grid = (_cdiv(num_seqs, block_size),) _build_final_chunk_indices_kernel[grid](