From 58d78f76700561c216c0b418ba382b46715138af Mon Sep 17 00:00:00 2001 From: Li-8916 Date: Sat, 28 Mar 2026 01:15:44 +0000 Subject: [PATCH 1/7] cos/sin shape already is [s,d], just use rotary_position_embedding directly Signed-off-by: Li-8916 --- vllm_omni/diffusion/layers/rope.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm_omni/diffusion/layers/rope.py b/vllm_omni/diffusion/layers/rope.py index 65d37d0b01..31fb547f6d 100644 --- a/vllm_omni/diffusion/layers/rope.py +++ b/vllm_omni/diffusion/layers/rope.py @@ -49,7 +49,9 @@ def apply_rotary_emb_mindiesd( # (B, S, D/2) -> (S, D/2) cos = cos[0] sin = sin[0] - + if cos.shape[-1] == x.shape[-1]: + half_head_dim = False + # already expanded to (S, D), just use directly if interleaved: # if last dim of sin and cos is D/2, expand to (S, D) to adapt to mindiesd operators if half_head_dim: From eb375af59239c23698c414b637f1dc436d98fb6b Mon Sep 17 00:00:00 2001 From: Li-8916 Date: Tue, 31 Mar 2026 11:58:21 +0800 Subject: [PATCH 2/7] Update rope.py Signed-off-by: Li-8916 --- vllm_omni/diffusion/layers/rope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_omni/diffusion/layers/rope.py b/vllm_omni/diffusion/layers/rope.py index 31fb547f6d..0b4d18bfc3 100644 --- a/vllm_omni/diffusion/layers/rope.py +++ b/vllm_omni/diffusion/layers/rope.py @@ -51,7 +51,7 @@ def apply_rotary_emb_mindiesd( sin = sin[0] if cos.shape[-1] == x.shape[-1]: half_head_dim = False - # already expanded to (S, D), just use directly + # already expanded to (S, D), just use directly if interleaved: # if last dim of sin and cos is D/2, expand to (S, D) to adapt to mindiesd operators if half_head_dim: From a25f1311b5d56c400931f83a0dd233edbd68cd1e Mon Sep 17 00:00:00 2001 From: Li-8916 Date: Tue, 31 Mar 2026 09:39:42 +0000 Subject: [PATCH 3/7] optimize rope on NPU Signed-off-by: Li-8916 Co-authored-by: vasede <1399968934@qq.com> --- .../diffusion/models/wan2_2/wan2_2_transformer.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py b/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py index a4ae3118a7..9996400f82 100644 --- a/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py +++ b/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py @@ -30,6 +30,10 @@ ) from vllm_omni.diffusion.forward_context import get_forward_context +from importlib.util import find_spec +from vllm_omni.diffusion.layers.rope import RotaryEmbedding +from vllm_omni.platforms import current_omni_platform + logger = init_logger(__name__) @@ -392,6 +396,7 @@ def __init__( softmax_scale=1.0 / (head_dim**0.5), causal=False, ) + self.rope = RotaryEmbedding(is_neox_style=False) def forward( self, @@ -418,8 +423,12 @@ def forward( # Apply rotary embeddings if rotary_emb is not None: freqs_cos, freqs_sin = rotary_emb - query = apply_rotary_emb_wan(query, freqs_cos, freqs_sin) - key = apply_rotary_emb_wan(key, freqs_cos, freqs_sin) + if find_spec("mindiesd") is not None and current_omni_platform.is_npu(): + query = self.rope(query,freqs_cos, freqs_sin) + key = self.rope(key,freqs_cos, freqs_sin) + else: + query = apply_rotary_emb_wan(query, freqs_cos, freqs_sin) + key = apply_rotary_emb_wan(key, freqs_cos, freqs_sin) # Create attention metadata if mask is provided attn_metadata = None From 38a794151f666942a314c90c4526e2bd6eae823c Mon Sep 17 00:00:00 2001 From: Li-8916 Date: Tue, 31 Mar 2026 19:18:22 +0800 Subject: [PATCH 4/7] update rotary Signed-off-by: Li-8916 --- vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py b/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py index 9996400f82..19626b6bda 100644 --- a/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py +++ b/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py @@ -424,8 +424,8 @@ def forward( if rotary_emb is not None: freqs_cos, freqs_sin = rotary_emb if find_spec("mindiesd") is not None and current_omni_platform.is_npu(): - query = self.rope(query,freqs_cos, freqs_sin) - key = self.rope(key,freqs_cos, freqs_sin) + query = self.rope(query, freqs_cos, freqs_sin) + key = self.rope(key, freqs_cos, freqs_sin) else: query = apply_rotary_emb_wan(query, freqs_cos, freqs_sin) key = apply_rotary_emb_wan(key, freqs_cos, freqs_sin) From b91cbf257bc80fa8c8e72a95280b832a890c4139 Mon Sep 17 00:00:00 2001 From: Li-8916 Date: Tue, 31 Mar 2026 19:25:43 +0800 Subject: [PATCH 5/7] Reorganize import statements in wan2_2_transformer.py Moved the import statement for find_spec to a new location. Signed-off-by: Li-8916 --- vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py b/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py index 19626b6bda..8028ac3917 100644 --- a/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py +++ b/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py @@ -3,6 +3,7 @@ import math from collections.abc import Iterable +from importlib.util import find_spec from typing import Any import torch @@ -30,7 +31,6 @@ ) from vllm_omni.diffusion.forward_context import get_forward_context -from importlib.util import find_spec from vllm_omni.diffusion.layers.rope import RotaryEmbedding from vllm_omni.platforms import current_omni_platform From e09cdb01fe018596219ecc270532f52b9499d919 Mon Sep 17 00:00:00 2001 From: Li-8916 Date: Tue, 31 Mar 2026 19:28:10 +0800 Subject: [PATCH 6/7] Remove unnecessary blank line in wan2_2_transformer.py Signed-off-by: Li-8916 --- vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py b/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py index 8028ac3917..4c41b60425 100644 --- a/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py +++ b/vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py @@ -30,7 +30,6 @@ SequenceParallelOutput, ) from vllm_omni.diffusion.forward_context import get_forward_context - from vllm_omni.diffusion.layers.rope import RotaryEmbedding from vllm_omni.platforms import current_omni_platform From bc28e90aa85f0438dc00ce52b439dea69ecb4c9a Mon Sep 17 00:00:00 2001 From: Li-8916 Date: Tue, 31 Mar 2026 19:29:07 +0800 Subject: [PATCH 7/7] Remove unnecessary line in rope.py Signed-off-by: Li-8916 --- vllm_omni/diffusion/layers/rope.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm_omni/diffusion/layers/rope.py b/vllm_omni/diffusion/layers/rope.py index 0b4d18bfc3..11204cf565 100644 --- a/vllm_omni/diffusion/layers/rope.py +++ b/vllm_omni/diffusion/layers/rope.py @@ -99,7 +99,6 @@ def forward_cuda( # (B, S, D/2) -> (S, D/2) cos = cos[0] sin = sin[0] - return apply_rotary_emb( x, cos,