From e4a6e8e52248ed15a3fccdb9e8e7cf5713f57325 Mon Sep 17 00:00:00 2001
From: HF-001 <1670186653@qq.com>
Date: Mon, 13 Apr 2026 11:13:02 +0800
Subject: [PATCH 1/2] [fix] fix Ascend310P3 error

Signed-off-by: HF-001 <1670186653@qq.com>
---
 vllm_ascend/ops/triton/gdn_chunk_meta.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/vllm_ascend/ops/triton/gdn_chunk_meta.py b/vllm_ascend/ops/triton/gdn_chunk_meta.py
index 7af19b91420..00d7e953956 100644
--- a/vllm_ascend/ops/triton/gdn_chunk_meta.py
+++ b/vllm_ascend/ops/triton/gdn_chunk_meta.py
@@ -16,6 +16,8 @@
 import torch
 from vllm.triton_utils import tl, triton
 
+from vllm_ascend.utils import is_310p
+
 
 def _cdiv(x: int, y: int) -> int:
     triton_cdiv = getattr(triton, "cdiv", None)
@@ -156,7 +158,12 @@ def _build_final_chunk_indices(
     out_final_chunk_indices: torch.Tensor,
 ) -> None:
     num_seqs = chunk_counts.shape[0]
-    if hasattr(_build_final_chunk_indices_kernel, "__getitem__"):
+    # 310P does not support Triton kernel compilation (bishengir-compile
+    # cannot target Ascend310P), so always use the PyTorch fallback path.
+    if (
+        not is_310p()
+        and hasattr(_build_final_chunk_indices_kernel, "__getitem__")
+    ):
         block_size = 256
         grid = (_cdiv(num_seqs, block_size),)
         _build_final_chunk_indices_kernel[grid](

From 6479dc8a99426dc7d09ef2d1a7bf65a67f943369 Mon Sep 17 00:00:00 2001
From: HF-001 <1670186653@qq.com>
Date: Mon, 13 Apr 2026 11:25:30 +0800
Subject: [PATCH 2/2] [fix] fix Ascend310P3 error

Signed-off-by: HF-001 <1670186653@qq.com>
---
 vllm_ascend/ops/triton/gdn_chunk_meta.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/vllm_ascend/ops/triton/gdn_chunk_meta.py b/vllm_ascend/ops/triton/gdn_chunk_meta.py
index 00d7e953956..1a9f5a905b5 100644
--- a/vllm_ascend/ops/triton/gdn_chunk_meta.py
+++ b/vllm_ascend/ops/triton/gdn_chunk_meta.py
@@ -160,10 +160,7 @@ def _build_final_chunk_indices(
     num_seqs = chunk_counts.shape[0]
     # 310P does not support Triton kernel compilation (bishengir-compile
     # cannot target Ascend310P), so always use the PyTorch fallback path.
-    if (
-        not is_310p()
-        and hasattr(_build_final_chunk_indices_kernel, "__getitem__")
-    ):
+    if not is_310p() and hasattr(_build_final_chunk_indices_kernel, "__getitem__"):
         block_size = 256
         grid = (_cdiv(num_seqs, block_size),)
         _build_final_chunk_indices_kernel[grid](