From 19c542d796f328f5632d663de4f9ad0004d80930 Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Mon, 28 Apr 2025 01:49:53 +0000 Subject: [PATCH] [Bugfix] fix triton and minicpm patch Signed-off-by: MengqingCao --- vllm_ascend/patch/__init__.py | 13 +--- .../patch/worker/patch_0_8_4/__init__.py | 1 - .../patch_0_8_4/patch_tritonplaceholder.py | 68 ------------------- .../patch/worker/patch_common/__init__.py | 1 - .../patch/worker/patch_main/__init__.py | 3 +- .../patch_minicpm.py | 0 6 files changed, 3 insertions(+), 83 deletions(-) delete mode 100644 vllm_ascend/patch/worker/patch_0_8_4/patch_tritonplaceholder.py rename vllm_ascend/patch/worker/{patch_common => patch_main}/patch_minicpm.py (100%) diff --git a/vllm_ascend/patch/__init__.py b/vllm_ascend/patch/__init__.py index ef0813ae9be..b8aff614e38 100644 --- a/vllm_ascend/patch/__init__.py +++ b/vllm_ascend/patch/__init__.py @@ -127,7 +127,7 @@ # Future Plan: # Revert it when the related pr is merged in vllm. # -# ** File: worker/patch_common/patch_minicpm.py ** +# ** File: worker/patch_main/patch_minicpm.py ** # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # 1. `vllm.model_executor.models.minicpm.MiniCPMAttention.forward` # Why: @@ -173,14 +173,3 @@ # Future Plan: # Revert it when the related pr is merged in vllm and vllm-ascend. # -# ** File: worker/patch_0_8_4/patch_tritonplaceholder.py ** -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# 1. `triton` Module -# Why: -# Triton is not supported on npu currently, importing triton will break vllm-ascend -# How: -# ditto -# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit.... -# TritonPlaceholder is only available in vllm>0.8.4 -# Future Plan: -# Revert it when branch main doesn't maintain v0.8.4. diff --git a/vllm_ascend/patch/worker/patch_0_8_4/__init__.py b/vllm_ascend/patch/worker/patch_0_8_4/__init__.py index c822006f2f8..2465b3f5c40 100644 --- a/vllm_ascend/patch/worker/patch_0_8_4/__init__.py +++ b/vllm_ascend/patch/worker/patch_0_8_4/__init__.py @@ -16,4 +16,3 @@ # import vllm_ascend.patch.worker.patch_0_8_4.patch_metrics # noqa -import vllm_ascend.patch.worker.patch_0_8_4.patch_tritonplaceholder # noqa diff --git a/vllm_ascend/patch/worker/patch_0_8_4/patch_tritonplaceholder.py b/vllm_ascend/patch/worker/patch_0_8_4/patch_tritonplaceholder.py deleted file mode 100644 index cd32cd1dd87..00000000000 --- a/vllm_ascend/patch/worker/patch_0_8_4/patch_tritonplaceholder.py +++ /dev/null @@ -1,68 +0,0 @@ -# -# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. -# Copyright 2023 The vLLM team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# This file is a part of the vllm-ascend project. -# Adapted from vllm/triton_utils/importing.py -# - -import sys -import types -from importlib.util import find_spec - -from vllm.logger import logger - -HAS_TRITON = ( - find_spec("triton") is not None - or find_spec("pytorch-triton-xpu") is not None # Not compatible -) - -if not HAS_TRITON: - logger.info("Triton not installed or not compatible; certain GPU-related" - " functions will not be available.") - - class TritonPlaceholder(types.ModuleType): - - def __init__(self): - super().__init__("triton") - self.jit = self._dummy_decorator("jit") - self.autotune = self._dummy_decorator("autotune") - self.heuristics = self._dummy_decorator("heuristics") - self.language = TritonLanguagePlaceholder() - logger.warning_once( - "Triton is not installed. Using dummy decorators. " - "Install it via `pip install triton` to enable kernel" - " compilation.") - - def _dummy_decorator(self, name): - - def decorator(func=None, **kwargs): - if func is None: - return lambda f: f - return func - - return decorator - - class TritonLanguagePlaceholder(types.ModuleType): - - def __init__(self): - super().__init__("triton.language") - self.constexpr = None - self.dtype = None - - sys.modules['triton'] = TritonPlaceholder() - sys.modules['triton.language'] = TritonLanguagePlaceholder() - -if 'triton' in sys.modules: - logger.info("Triton module has been replaced with a placeholder.") diff --git a/vllm_ascend/patch/worker/patch_common/__init__.py b/vllm_ascend/patch/worker/patch_common/__init__.py index 2ba9917cc46..590074fe26b 100644 --- a/vllm_ascend/patch/worker/patch_common/__init__.py +++ b/vllm_ascend/patch/worker/patch_common/__init__.py @@ -16,6 +16,5 @@ # import vllm_ascend.patch.worker.patch_common.patch_metrics # noqa -import vllm_ascend.patch.worker.patch_common.patch_minicpm # noqa import vllm_ascend.patch.worker.patch_common.patch_multi_step_worker # noqa import vllm_ascend.patch.worker.patch_common.patch_spec_decode_worker # noqa diff --git a/vllm_ascend/patch/worker/patch_main/__init__.py b/vllm_ascend/patch/worker/patch_main/__init__.py index 2ed088b746d..a7536c4e810 100644 --- a/vllm_ascend/patch/worker/patch_main/__init__.py +++ b/vllm_ascend/patch/worker/patch_main/__init__.py @@ -13,4 +13,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# \ No newline at end of file +# +import vllm_ascend.patch.worker.patch_main.patch_minicpm # noqa diff --git a/vllm_ascend/patch/worker/patch_common/patch_minicpm.py b/vllm_ascend/patch/worker/patch_main/patch_minicpm.py similarity index 100% rename from vllm_ascend/patch/worker/patch_common/patch_minicpm.py rename to vllm_ascend/patch/worker/patch_main/patch_minicpm.py