From 19c542d796f328f5632d663de4f9ad0004d80930 Mon Sep 17 00:00:00 2001
From: MengqingCao <cmq0113@163.com>
Date: Mon, 28 Apr 2025 01:49:53 +0000
Subject: [PATCH] [Bugfix] fix triton and minicpm patch

Signed-off-by: MengqingCao <cmq0113@163.com>
---
 vllm_ascend/patch/__init__.py                 | 13 +---
 .../patch/worker/patch_0_8_4/__init__.py      |  1 -
 .../patch_0_8_4/patch_tritonplaceholder.py    | 68 -------------------
 .../patch/worker/patch_common/__init__.py     |  1 -
 .../patch/worker/patch_main/__init__.py       |  3 +-
 .../patch_minicpm.py                          |  0
 6 files changed, 3 insertions(+), 83 deletions(-)
 delete mode 100644 vllm_ascend/patch/worker/patch_0_8_4/patch_tritonplaceholder.py
 rename vllm_ascend/patch/worker/{patch_common => patch_main}/patch_minicpm.py (100%)

diff --git a/vllm_ascend/patch/__init__.py b/vllm_ascend/patch/__init__.py
index ef0813ae9be..b8aff614e38 100644
--- a/vllm_ascend/patch/__init__.py
+++ b/vllm_ascend/patch/__init__.py
@@ -127,7 +127,7 @@
 #    Future Plan:
 #       Revert it when the related pr is merged in vllm.
 #
-# ** File: worker/patch_common/patch_minicpm.py **
+# ** File: worker/patch_main/patch_minicpm.py **
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #   1. `vllm.model_executor.models.minicpm.MiniCPMAttention.forward`
 #    Why:
@@ -173,14 +173,3 @@
 #    Future Plan:
 #       Revert it when the related pr is merged in vllm and vllm-ascend.
 #
-# ** File: worker/patch_0_8_4/patch_tritonplaceholder.py **
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#   1. `triton` Module
-#    Why:
-#       Triton is not supported on npu currently, importing triton will break vllm-ascend
-#    How：
-#       ditto
-#    Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit....
-#       TritonPlaceholder is only available in vllm>0.8.4
-#    Future Plan:
-#       Revert it when branch main doesn't maintain v0.8.4.
diff --git a/vllm_ascend/patch/worker/patch_0_8_4/__init__.py b/vllm_ascend/patch/worker/patch_0_8_4/__init__.py
index c822006f2f8..2465b3f5c40 100644
--- a/vllm_ascend/patch/worker/patch_0_8_4/__init__.py
+++ b/vllm_ascend/patch/worker/patch_0_8_4/__init__.py
@@ -16,4 +16,3 @@
 #
 
 import vllm_ascend.patch.worker.patch_0_8_4.patch_metrics  # noqa
-import vllm_ascend.patch.worker.patch_0_8_4.patch_tritonplaceholder  # noqa
diff --git a/vllm_ascend/patch/worker/patch_0_8_4/patch_tritonplaceholder.py b/vllm_ascend/patch/worker/patch_0_8_4/patch_tritonplaceholder.py
deleted file mode 100644
index cd32cd1dd87..00000000000
--- a/vllm_ascend/patch/worker/patch_0_8_4/patch_tritonplaceholder.py
+++ /dev/null
@@ -1,68 +0,0 @@
-#
-# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
-# Copyright 2023 The vLLM team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# This file is a part of the vllm-ascend project.
-# Adapted from vllm/triton_utils/importing.py
-#
-
-import sys
-import types
-from importlib.util import find_spec
-
-from vllm.logger import logger
-
-HAS_TRITON = (
-    find_spec("triton") is not None
-    or find_spec("pytorch-triton-xpu") is not None  # Not compatible
-)
-
-if not HAS_TRITON:
-    logger.info("Triton not installed or not compatible; certain GPU-related"
-                " functions will not be available.")
-
-    class TritonPlaceholder(types.ModuleType):
-
-        def __init__(self):
-            super().__init__("triton")
-            self.jit = self._dummy_decorator("jit")
-            self.autotune = self._dummy_decorator("autotune")
-            self.heuristics = self._dummy_decorator("heuristics")
-            self.language = TritonLanguagePlaceholder()
-            logger.warning_once(
-                "Triton is not installed. Using dummy decorators. "
-                "Install it via `pip install triton` to enable kernel"
-                " compilation.")
-
-        def _dummy_decorator(self, name):
-
-            def decorator(func=None, **kwargs):
-                if func is None:
-                    return lambda f: f
-                return func
-
-            return decorator
-
-    class TritonLanguagePlaceholder(types.ModuleType):
-
-        def __init__(self):
-            super().__init__("triton.language")
-            self.constexpr = None
-            self.dtype = None
-
-    sys.modules['triton'] = TritonPlaceholder()
-    sys.modules['triton.language'] = TritonLanguagePlaceholder()
-
-if 'triton' in sys.modules:
-    logger.info("Triton module has been replaced with a placeholder.")
diff --git a/vllm_ascend/patch/worker/patch_common/__init__.py b/vllm_ascend/patch/worker/patch_common/__init__.py
index 2ba9917cc46..590074fe26b 100644
--- a/vllm_ascend/patch/worker/patch_common/__init__.py
+++ b/vllm_ascend/patch/worker/patch_common/__init__.py
@@ -16,6 +16,5 @@
 #
 
 import vllm_ascend.patch.worker.patch_common.patch_metrics  # noqa
-import vllm_ascend.patch.worker.patch_common.patch_minicpm  # noqa
 import vllm_ascend.patch.worker.patch_common.patch_multi_step_worker  # noqa
 import vllm_ascend.patch.worker.patch_common.patch_spec_decode_worker  # noqa
diff --git a/vllm_ascend/patch/worker/patch_main/__init__.py b/vllm_ascend/patch/worker/patch_main/__init__.py
index 2ed088b746d..a7536c4e810 100644
--- a/vllm_ascend/patch/worker/patch_main/__init__.py
+++ b/vllm_ascend/patch/worker/patch_main/__init__.py
@@ -13,4 +13,5 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
\ No newline at end of file
+#
+import vllm_ascend.patch.worker.patch_main.patch_minicpm  # noqa
diff --git a/vllm_ascend/patch/worker/patch_common/patch_minicpm.py b/vllm_ascend/patch/worker/patch_main/patch_minicpm.py
similarity index 100%
rename from vllm_ascend/patch/worker/patch_common/patch_minicpm.py
rename to vllm_ascend/patch/worker/patch_main/patch_minicpm.py