From 64fabac31dd5916063fb913793d890e3caa3526e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=B0=91=E9=B9=8F?= Date: Thu, 27 Nov 2025 11:35:00 +0800 Subject: [PATCH 1/2] fix qwen3vl mrope op MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 李少鹏 --- vllm_ascend/ops/rotary_embedding.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/vllm_ascend/ops/rotary_embedding.py b/vllm_ascend/ops/rotary_embedding.py index 91a6f09fa1a..e3078dba0d5 100644 --- a/vllm_ascend/ops/rotary_embedding.py +++ b/vllm_ascend/ops/rotary_embedding.py @@ -24,7 +24,6 @@ from vllm.model_executor.layers.rotary_embedding import ( DeepseekScalingRotaryEmbedding, MRotaryEmbedding, RotaryEmbedding, YaRNScalingRotaryEmbedding) -from vllm.platforms import CpuArchEnum from vllm_ascend.platform import NPUPlatform from vllm_ascend.utils import (AscendDeviceType, enable_custom_op, @@ -409,9 +408,7 @@ def forward_oot( key: torch.Tensor, ): # TODO: This judgment will be removed once the mrope precision issue is fixed - if self.mrope_section != [ - 16, 24, 24 - ] or NPUPlatform.get_cpu_architecture() == CpuArchEnum.X86: + if self.mrope_section != [16, 24, 24]: return super().forward_oot(positions, query, key) import torch_npu @@ -426,7 +423,7 @@ def forward_oot( self.cos_sin_cache = self.cos_sin_cache.to( # type: ignore query.dtype) # type: ignore - query, key = torch_npu.npu_mrope(positions, + query, key = torch_npu.npu_mrope(positions.contiguous(), query.contiguous(), key.contiguous(), self.cos_sin_cache.contiguous(), From 88f1c733888eb53c0fd20bf19b7c8323381a2fb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=B0=91=E9=B9=8F?= Date: Fri, 28 Nov 2025 10:20:47 +0800 Subject: [PATCH 2/2] fix qwen3vl mrope op MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 李少鹏 --- vllm_ascend/ops/rotary_embedding.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm_ascend/ops/rotary_embedding.py b/vllm_ascend/ops/rotary_embedding.py index e3078dba0d5..a4dd1ce06dc 100644 --- a/vllm_ascend/ops/rotary_embedding.py +++ b/vllm_ascend/ops/rotary_embedding.py @@ -407,7 +407,6 @@ def forward_oot( query: torch.Tensor, key: torch.Tensor, ): - # TODO: This judgment will be removed once the mrope precision issue is fixed if self.mrope_section != [16, 24, 24]: return super().forward_oot(positions, query, key)