diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index 67a49c5755c..52e75989d2f 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -65,6 +65,7 @@ th { |--------------|--------|-------------------| | `Qwen3OmniMoeForConditionalGeneration` | Qwen3-Omni | `Qwen/Qwen3-Omni-30B-A3B-Instruct` | | `Qwen2_5OmniForConditionalGeneration` | Qwen2.5-Omni | `Qwen/Qwen2.5-Omni-7B`, `Qwen/Qwen2.5-Omni-3B`| +| `HunyuanImage3ForCausalMM` | HunyuanImage3.0 (DiT-only) | `tencent/HunyuanImage-3.0`, `tencent/HunyuanImage-3.0-Instruct` | | `QwenImagePipeline` | Qwen-Image | `Qwen/Qwen-Image` | | `QwenImagePipeline` | Qwen-Image-2512 | `Qwen/Qwen-Image-2512` | | `QwenImageEditPipeline` | Qwen-Image-Edit | `Qwen/Qwen-Image-Edit` | diff --git a/tests/diffusion/models/hunyuan_image_3/test_hunyuan_fused_moe.py b/tests/diffusion/models/hunyuan_image_3/test_hunyuan_fused_moe.py new file mode 100644 index 00000000000..2aa1adf4449 --- /dev/null +++ b/tests/diffusion/models/hunyuan_image_3/test_hunyuan_fused_moe.py @@ -0,0 +1,112 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Unit tests for HunyuanFusedMoE (Support HunyuanImage3 Diffusion Model, 5a779b4).""" + +import pytest + +pytestmark = [pytest.mark.core_model, pytest.mark.cpu] + + +class TestHunyuanFusedMoEPlatformDispatch: + """Test platform dispatch via platform qualname hooks.""" + + def test_default_platform_uses_default_impl_qualname(self, mocker): + """HunyuanFusedMoE should resolve the impl class from the platform hook.""" + import vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe as hunyuan_moe + + mock_platform = mocker.MagicMock() + mock_platform.get_diffusion_model_impl_qualname.return_value = ( + "vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe.HunyuanFusedMoEDefault" + ) + + mocker.patch.object( + hunyuan_moe, + "current_omni_platform", + mock_platform, + ) + mock_resolve = mocker.patch.object(hunyuan_moe, "resolve_obj_by_qualname") + mock_impl = mocker.MagicMock() + mock_resolve.return_value = mock_impl + + from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import ( + HunyuanFusedMoE, + ) + + HunyuanFusedMoE(prefix="") + + mock_platform.prepare_diffusion_op_runtime.assert_called_once_with("hunyuan_fused_moe") + mock_platform.get_diffusion_model_impl_qualname.assert_called_once_with("hunyuan_fused_moe") + mock_resolve.assert_called_once_with( + "vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe.HunyuanFusedMoEDefault" + ) + mock_impl.assert_called_once_with(prefix="") + + +class TestHunyuanFusedMoEFactory: + """Test HunyuanFusedMoE factory __new__ and make_expert_params_mapping delegation.""" + + def test_new_delegates_to_impl_class(self, mocker): + """HunyuanFusedMoE(prefix=..., **kwargs) should instantiate and return impl instance.""" + import vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe as hunyuan_moe + + class MockImpl: + def __init__(self, *, prefix: str = "", **kwargs): + self.prefix = prefix + self.kwargs = kwargs + + mock_platform = mocker.MagicMock() + mock_platform.get_diffusion_model_impl_qualname.return_value = "mock.impl.Qualname" + mocker.patch.object(hunyuan_moe, "current_omni_platform", mock_platform) + + mock_impl_class = mocker.MagicMock(return_value=MockImpl(prefix="test", a=1)) + mocker.patch.object(hunyuan_moe, "resolve_obj_by_qualname", return_value=mock_impl_class) + + from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import ( + HunyuanFusedMoE, + ) + + result = HunyuanFusedMoE(prefix="test", a=1) + + assert isinstance(result, MockImpl) + assert result.prefix == "test" + assert result.kwargs == {"a": 1} + mock_platform.prepare_diffusion_op_runtime.assert_called_once_with("hunyuan_fused_moe") + mock_platform.get_diffusion_model_impl_qualname.assert_called_once_with("hunyuan_fused_moe") + mock_impl_class.assert_called_once_with(prefix="test", a=1) + + def test_make_expert_params_mapping_delegates_to_impl(self, mocker): + """make_expert_params_mapping should delegate to impl class method.""" + import vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe as hunyuan_moe + + expected_mapping = [("a", "b", 0, "c")] + mock_platform = mocker.MagicMock() + mock_platform.get_diffusion_model_impl_qualname.return_value = "mock.impl.Qualname" + mocker.patch.object(hunyuan_moe, "current_omni_platform", mock_platform) + + mock_impl_class = mocker.MagicMock() + mock_impl_class.make_expert_params_mapping = mocker.MagicMock(return_value=expected_mapping) + mocker.patch.object(hunyuan_moe, "resolve_obj_by_qualname", return_value=mock_impl_class) + + from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import ( + HunyuanFusedMoE, + ) + + result = HunyuanFusedMoE.make_expert_params_mapping( + model=None, + ckpt_gate_proj_name="gate", + ckpt_down_proj_name="down", + ckpt_up_proj_name="up", + num_experts=4, + num_redundant_experts=0, + ) + + assert result == expected_mapping + mock_platform.get_diffusion_model_impl_qualname.assert_called_once_with("hunyuan_fused_moe") + mock_impl_class.make_expert_params_mapping.assert_called_once_with( + None, + ckpt_gate_proj_name="gate", + ckpt_down_proj_name="down", + ckpt_up_proj_name="up", + num_experts=4, + num_redundant_experts=0, + ) diff --git a/tests/diffusion/test_data_is_moe.py b/tests/diffusion/test_data_is_moe.py new file mode 100644 index 00000000000..25fa59ef1db --- /dev/null +++ b/tests/diffusion/test_data_is_moe.py @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Unit tests for OmniDiffusionConfig.is_moe (fix is_moe type and threshold, 6663c0b).""" + +import pytest + +from vllm_omni.diffusion.data import OmniDiffusionConfig, TransformerConfig + +pytestmark = [pytest.mark.core_model, pytest.mark.cpu] + + +class TestOmniDiffusionConfigIsMoE: + """Tests for OmniDiffusionConfig.is_moe property. + + Covers commit 6663c0b: fix is_moe type and threshold + - num_experts must be (list, tuple, int); otherwise return False. + - Threshold: is_moe is True when num_experts > 0 (not > 1). + """ + + def test_is_moe_missing_num_experts_returns_false(self): + """When num_experts is absent, is_moe should be False.""" + tf_config = TransformerConfig.from_dict({}) + config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) + assert config.is_moe is False + + def test_is_moe_none_num_experts_returns_false(self): + """When num_experts is explicitly None (e.g. in params), is_moe should be False.""" + tf_config = TransformerConfig.from_dict({"num_experts": None}) + config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) + assert config.is_moe is False + + def test_is_moe_non_allowed_type_returns_false(self): + """When num_experts is not int/list/tuple (e.g. str), is_moe should be False.""" + tf_config = TransformerConfig.from_dict({"num_experts": "2"}) + config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) + assert config.is_moe is False + + def test_is_moe_int_zero_returns_false(self): + """num_experts int 0 should yield is_moe False (threshold > 0).""" + tf_config = TransformerConfig.from_dict({"num_experts": 0}) + config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) + assert config.is_moe is False + + def test_is_moe_int_one_returns_true(self): + """num_experts int 1 should yield is_moe True (threshold > 0, not > 1).""" + tf_config = TransformerConfig.from_dict({"num_experts": 1}) + config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) + assert config.is_moe is True + + def test_is_moe_int_gt_one_returns_true(self): + """num_experts int > 1 should yield is_moe True.""" + tf_config = TransformerConfig.from_dict({"num_experts": 2}) + config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) + assert config.is_moe is True + + def test_is_moe_list_all_zero_returns_false(self): + """num_experts list with all <= 0 should yield is_moe False.""" + tf_config = TransformerConfig.from_dict({"num_experts": [0]}) + config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) + assert config.is_moe is False + + def test_is_moe_list_has_positive_returns_true(self): + """num_experts list with any int > 0 should yield is_moe True.""" + tf_config = TransformerConfig.from_dict({"num_experts": [0, 1]}) + config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) + assert config.is_moe is True + + def test_is_moe_tuple_has_positive_returns_true(self): + """num_experts tuple with any int > 0 should yield is_moe True.""" + tf_config = TransformerConfig.from_dict({"num_experts": (0, 2)}) + config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) + assert config.is_moe is True + + def test_is_moe_list_non_int_ignored(self): + """num_experts list with only non-int entries should yield is_moe False.""" + tf_config = TransformerConfig.from_dict({"num_experts": ["a", 0.0]}) + config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) + assert config.is_moe is False diff --git a/vllm_omni/diffusion/data.py b/vllm_omni/diffusion/data.py index 38366469eb1..634cb329414 100644 --- a/vllm_omni/diffusion/data.py +++ b/vllm_omni/diffusion/data.py @@ -463,11 +463,13 @@ class OmniDiffusionConfig: @property def is_moe(self) -> bool: num_experts = self.tf_model_config.get("num_experts", None) + if not isinstance(num_experts, (list, tuple, int)): + return False if isinstance(num_experts, int): - return num_experts > 1 + return num_experts > 0 if isinstance(num_experts, (list, tuple)): - return any(isinstance(n, int) and n > 1 for n in num_experts) + return any(isinstance(n, int) and n > 0 for n in num_experts) return False diff --git a/vllm_omni/diffusion/models/hunyuan_image_3/__init__.py b/vllm_omni/diffusion/models/hunyuan_image_3/__init__.py index 98a3ac07b1c..cbc6a8ad1f4 100644 --- a/vllm_omni/diffusion/models/hunyuan_image_3/__init__.py +++ b/vllm_omni/diffusion/models/hunyuan_image_3/__init__.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Hunyuan Image 3 diffusion model components.""" +from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import HunyuanFusedMoE from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_image_3_transformer import ( HunyuanImage3Model, HunyuanImage3Text2ImagePipeline, @@ -10,8 +11,4 @@ HunyuanImage3Pipeline, ) -__all__ = [ - "HunyuanImage3Pipeline", - "HunyuanImage3Model", - "HunyuanImage3Text2ImagePipeline", -] +__all__ = ["HunyuanImage3Pipeline", "HunyuanImage3Model", "HunyuanImage3Text2ImagePipeline", "HunyuanFusedMoE"] diff --git a/vllm_omni/diffusion/models/hunyuan_image_3/hunyuan_fused_moe.py b/vllm_omni/diffusion/models/hunyuan_image_3/hunyuan_fused_moe.py new file mode 100644 index 00000000000..5ada5ceb848 --- /dev/null +++ b/vllm_omni/diffusion/models/hunyuan_image_3/hunyuan_fused_moe.py @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from typing import Any + +from vllm.model_executor.layers.fused_moe import SharedFusedMoE +from vllm.utils.import_utils import resolve_obj_by_qualname + +from vllm_omni.platforms import current_omni_platform + + +class HunyuanFusedMoEDefault(SharedFusedMoE): + def __init__(self, *, prefix: str = "", **kwargs: Any) -> None: + super().__init__(prefix=prefix, **kwargs) + self._prefix = prefix + self._init_hook_handle = self.register_forward_pre_hook(self._initialize_kernel_hook, with_kwargs=True) + + def _initialize_kernel_hook(self, module: Any, args: Any, kwargs: Any) -> None: + if self.quant_method: + self.quant_method.process_weights_after_loading(self) + self._init_hook_handle.remove() + + def forward(self, hidden_states: Any, router_logits: Any) -> Any: + return super().forward(hidden_states, router_logits) + + +class HunyuanFusedMoE: + def __new__(cls, *, prefix: str = "", **kwargs: Any) -> Any: + op_name = "hunyuan_fused_moe" + current_omni_platform.prepare_diffusion_op_runtime(op_name) + impl = resolve_obj_by_qualname( + current_omni_platform.get_diffusion_model_impl_qualname(op_name), + ) + return impl(prefix=prefix, **kwargs) + + @classmethod + def make_expert_params_mapping( + cls, + model: Any, + ckpt_gate_proj_name: str, + ckpt_down_proj_name: str, + ckpt_up_proj_name: str, + num_experts: int, + num_redundant_experts: int = 0, + ) -> list[tuple[str, str, int, str]]: + impl = resolve_obj_by_qualname( + current_omni_platform.get_diffusion_model_impl_qualname("hunyuan_fused_moe"), + ) + return impl.make_expert_params_mapping( + model, + ckpt_gate_proj_name=ckpt_gate_proj_name, + ckpt_down_proj_name=ckpt_down_proj_name, + ckpt_up_proj_name=ckpt_up_proj_name, + num_experts=num_experts, + num_redundant_experts=num_redundant_experts, + ) diff --git a/vllm_omni/diffusion/models/hunyuan_image_3/hunyuan_image_3_transformer.py b/vllm_omni/diffusion/models/hunyuan_image_3/hunyuan_image_3_transformer.py index 0c2f9e290ac..a89931550de 100644 --- a/vllm_omni/diffusion/models/hunyuan_image_3/hunyuan_image_3_transformer.py +++ b/vllm_omni/diffusion/models/hunyuan_image_3/hunyuan_image_3_transformer.py @@ -32,10 +32,8 @@ from vllm.config import CacheConfig from vllm.distributed import ( get_tensor_model_parallel_world_size, - tensor_model_parallel_all_reduce, ) from vllm.model_executor.layers.activation import SiluAndMul -from vllm.model_executor.layers.fused_moe import SharedFusedMoE from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import ( MergedColumnParallelLinear, @@ -63,6 +61,7 @@ from vllm_omni.diffusion.distributed.parallel_state import get_pp_group from vllm_omni.diffusion.distributed.utils import get_local_device from vllm_omni.diffusion.layers.rope import RotaryEmbedding +from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import HunyuanFusedMoE logger = logging.getLogger(__name__) @@ -1417,7 +1416,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: final_hidden_states = final_hidden_states[0] + final_hidden_states[1] if self.tp_size > 1: - final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states) + final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(final_hidden_states) return final_hidden_states.view(orig_shape) @@ -1565,22 +1564,6 @@ def forward( return output, None, past_key_value -class HunyuanFusedMoE(SharedFusedMoE): - def __init__(self, *, prefix: str = "", **kwargs): - super().__init__(prefix=prefix, **kwargs) - self._prefix = prefix - - self._init_hook_handle = self.register_forward_pre_hook(self._initialize_kernel_hook, with_kwargs=True) - - def _initialize_kernel_hook(self, module, args, kwargs): - if self.quant_method: - self.quant_method.process_weights_after_loading(self) - self._init_hook_handle.remove() - - def forward(self, hidden_states, router_logits): - return super().forward(hidden_states, router_logits) - - class HunyuanImage3DecoderLayer(nn.Module): def __init__(self, config: HunyuanImage3Config, layer_idx: int, prefix: str = ""): super().__init__() @@ -2454,7 +2437,6 @@ def __call__( callback_outputs = callback_on_step_end(self, i, t, callback_kwargs) latents = callback_outputs.pop("latents", latents) - # call the callback, if provided if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): progress_bar.update() diff --git a/vllm_omni/platforms/interface.py b/vllm_omni/platforms/interface.py index f90c69e3a53..314cb3219e5 100644 --- a/vllm_omni/platforms/interface.py +++ b/vllm_omni/platforms/interface.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from enum import Enum +from typing import Any import torch from vllm.platforms import Platform @@ -52,6 +53,16 @@ def get_omni_generation_worker_cls(cls) -> str: def get_default_stage_config_path(cls) -> str: raise NotImplementedError + @classmethod + def get_diffusion_model_impl_qualname(cls, op_name: str) -> str: + if op_name == "hunyuan_fused_moe": + return "vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe.HunyuanFusedMoEDefault" + raise NotImplementedError(f"Unsupported diffusion model op: {op_name}") + + @classmethod + def prepare_diffusion_op_runtime(cls, op_name: str, **kwargs: Any) -> None: + return None + @classmethod def get_diffusion_attn_backend_cls( cls, diff --git a/vllm_omni/platforms/npu/models/__init__.py b/vllm_omni/platforms/npu/models/__init__.py new file mode 100644 index 00000000000..208f01a7cb5 --- /dev/null +++ b/vllm_omni/platforms/npu/models/__init__.py @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project diff --git a/vllm_omni/platforms/npu/models/hunyuan_fused_moe.py b/vllm_omni/platforms/npu/models/hunyuan_fused_moe.py new file mode 100644 index 00000000000..46c76a65290 --- /dev/null +++ b/vllm_omni/platforms/npu/models/hunyuan_fused_moe.py @@ -0,0 +1,108 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from typing import Any + +import torch +import vllm.forward_context as _vllm_fc +from vllm.config import VllmConfig +from vllm.distributed import get_ep_group +from vllm.distributed.parallel_state import get_tensor_model_parallel_world_size +from vllm.distributed.parallel_state import ( + init_model_parallel_group as vllm_init_model_parallel_group, +) +from vllm_ascend.ascend_forward_context import MoECommType +from vllm_ascend.ops.fused_moe.fused_moe import AscendSharedFusedMoE +from vllm_ascend.ops.fused_moe.moe_comm_method import _MoECommMethods +from vllm_ascend.utils import AscendDeviceType, get_ascend_device_type + +from vllm_omni.diffusion.distributed.parallel_state import ( + get_data_parallel_world_size, + get_world_group, +) +from vllm_omni.diffusion.forward_context import get_forward_context as omni_get_ctx + + +def _init_mc2_group_for_diffusion( + world_size: int, + data_parallel_size: int, + tensor_parallel_size: int, + backend: str, + local_rank: int, +) -> None: + import vllm_ascend.distributed.parallel_state as vllm_ascend_parallel_state + + if getattr(vllm_ascend_parallel_state, "_MC2", None) is not None: + return + all_ranks = torch.arange(world_size).reshape(-1, data_parallel_size * tensor_parallel_size) + group_ranks = all_ranks.unbind(0) + group_ranks = [x.tolist() for x in group_ranks] + + vllm_ascend_parallel_state._MC2 = vllm_init_model_parallel_group( + group_ranks, + local_rank, + backend, + group_name="mc2", + ) + + +def _select_moe_comm_method(vllm_config: VllmConfig) -> MoECommType | None: + soc_version = get_ascend_device_type() + if not vllm_config.parallel_config.enable_expert_parallel or get_ep_group().world_size == 1: + moe_comm_type = MoECommType.ALLGATHER + elif soc_version in {AscendDeviceType.A2}: + moe_comm_type = MoECommType.ALLGATHER + elif soc_version in {AscendDeviceType.A3}: + moe_comm_type = MoECommType.ALLTOALL + elif soc_version in {AscendDeviceType._310P}: + moe_comm_type = MoECommType.ALLGATHER + elif soc_version in {AscendDeviceType.A5}: + moe_comm_type = MoECommType.ALLTOALL + else: + raise ValueError(f"Unsupported soc_version: {soc_version}") + return moe_comm_type + + +def prepare_hunyuan_fused_moe_runtime() -> None: + world_size = torch.distributed.get_world_size() + data_parallel_size = get_data_parallel_world_size() + tensor_parallel_size = get_tensor_model_parallel_world_size() + backend = torch.distributed.get_backend(get_world_group().device_group) + local_rank = get_world_group().local_rank + _init_mc2_group_for_diffusion( + world_size=world_size, + data_parallel_size=data_parallel_size, + tensor_parallel_size=tensor_parallel_size, + backend=backend, + local_rank=local_rank, + ) + + if not hasattr(_vllm_fc.ForwardContext, "moe_comm_method"): + _vllm_fc.ForwardContext.__annotations__["in_profile_run"] = bool + _vllm_fc.ForwardContext.in_profile_run = False + + _vllm_fc.ForwardContext.moe_comm_type = _select_moe_comm_method(vllm_config=omni_get_ctx().vllm_config) + _vllm_fc.ForwardContext.moe_comm_method = _MoECommMethods.get(_vllm_fc.ForwardContext.moe_comm_type) + _vllm_fc.ForwardContext.flash_comm_v1_enabled = False + + +class AscendHunyuanFusedMoE(AscendSharedFusedMoE): + def __init__(self, *, prefix: str = "", **kwargs: Any) -> None: + super().__init__(prefix=prefix, **kwargs) + self._prefix = prefix + self._init_hook_handle = self.register_forward_pre_hook(self._initialize_kernel_hook, with_kwargs=True) + + def _initialize_kernel_hook(self, module: Any, args: Any, kwargs: Any) -> None: + if self.quant_method: + self.quant_method.process_weights_after_loading(self) + self._init_hook_handle.remove() + + def forward(self, hidden_states: Any, router_logits: Any) -> Any: + return super().forward(hidden_states, router_logits) + + def __del__(self): + import vllm_ascend.distributed.parallel_state as vllm_ascend_parallel_state + + if vllm_ascend_parallel_state._MC2: + vllm_ascend_parallel_state._MC2.destroy() + vllm_ascend_parallel_state._MC2 = None diff --git a/vllm_omni/platforms/npu/platform.py b/vllm_omni/platforms/npu/platform.py index 3c2495c3d35..bda4e4f6155 100644 --- a/vllm_omni/platforms/npu/platform.py +++ b/vllm_omni/platforms/npu/platform.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from typing import Any + import torch from vllm.logger import init_logger from vllm_ascend.platform import NPUPlatform @@ -33,6 +35,23 @@ def get_omni_generation_worker_cls(cls) -> str: def get_default_stage_config_path(cls) -> str: return "vllm_omni/platforms/npu/stage_configs" + @classmethod + def get_diffusion_model_impl_qualname(cls, op_name: str) -> str: + if op_name == "hunyuan_fused_moe": + return "vllm_omni.platforms.npu.models.hunyuan_fused_moe.AscendHunyuanFusedMoE" + return super().get_diffusion_model_impl_qualname(op_name) + + @classmethod + def prepare_diffusion_op_runtime(cls, op_name: str, **kwargs: Any) -> None: + if op_name != "hunyuan_fused_moe": + return + + from vllm_omni.platforms.npu.models.hunyuan_fused_moe import ( + prepare_hunyuan_fused_moe_runtime, + ) + + prepare_hunyuan_fused_moe_runtime() + @classmethod def get_diffusion_attn_backend_cls( cls,