Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/models/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ th {
|--------------|--------|-------------------|
| `Qwen3OmniMoeForConditionalGeneration` | Qwen3-Omni | `Qwen/Qwen3-Omni-30B-A3B-Instruct` |
| `Qwen2_5OmniForConditionalGeneration` | Qwen2.5-Omni | `Qwen/Qwen2.5-Omni-7B`, `Qwen/Qwen2.5-Omni-3B`|
| `HunyuanImage3ForCausalMM` | HunyuanImage3.0 (DiT-only) | `tencent/HunyuanImage-3.0`, `tencent/HunyuanImage-3.0-Instruct` |
| `QwenImagePipeline` | Qwen-Image | `Qwen/Qwen-Image` |
| `QwenImagePipeline` | Qwen-Image-2512 | `Qwen/Qwen-Image-2512` |
| `QwenImageEditPipeline` | Qwen-Image-Edit | `Qwen/Qwen-Image-Edit` |
Expand Down
112 changes: 112 additions & 0 deletions tests/diffusion/models/hunyuan_image_3/test_hunyuan_fused_moe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Unit tests for HunyuanFusedMoE (Support HunyuanImage3 Diffusion Model, 5a779b4)."""

import pytest

pytestmark = [pytest.mark.core_model, pytest.mark.cpu]


class TestHunyuanFusedMoEPlatformDispatch:
"""Test platform dispatch via platform qualname hooks."""

def test_default_platform_uses_default_impl_qualname(self, mocker):
"""HunyuanFusedMoE should resolve the impl class from the platform hook."""
import vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe as hunyuan_moe

mock_platform = mocker.MagicMock()
mock_platform.get_diffusion_model_impl_qualname.return_value = (
"vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe.HunyuanFusedMoEDefault"
)

mocker.patch.object(
hunyuan_moe,
"current_omni_platform",
mock_platform,
)
mock_resolve = mocker.patch.object(hunyuan_moe, "resolve_obj_by_qualname")
mock_impl = mocker.MagicMock()
mock_resolve.return_value = mock_impl

from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import (
HunyuanFusedMoE,
)

HunyuanFusedMoE(prefix="")

mock_platform.prepare_diffusion_op_runtime.assert_called_once_with("hunyuan_fused_moe")
mock_platform.get_diffusion_model_impl_qualname.assert_called_once_with("hunyuan_fused_moe")
mock_resolve.assert_called_once_with(
"vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe.HunyuanFusedMoEDefault"
)
mock_impl.assert_called_once_with(prefix="")


class TestHunyuanFusedMoEFactory:
"""Test HunyuanFusedMoE factory __new__ and make_expert_params_mapping delegation."""

def test_new_delegates_to_impl_class(self, mocker):
"""HunyuanFusedMoE(prefix=..., **kwargs) should instantiate and return impl instance."""
import vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe as hunyuan_moe

class MockImpl:
def __init__(self, *, prefix: str = "", **kwargs):
self.prefix = prefix
self.kwargs = kwargs

mock_platform = mocker.MagicMock()
mock_platform.get_diffusion_model_impl_qualname.return_value = "mock.impl.Qualname"
mocker.patch.object(hunyuan_moe, "current_omni_platform", mock_platform)

mock_impl_class = mocker.MagicMock(return_value=MockImpl(prefix="test", a=1))
mocker.patch.object(hunyuan_moe, "resolve_obj_by_qualname", return_value=mock_impl_class)

from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import (
HunyuanFusedMoE,
)

result = HunyuanFusedMoE(prefix="test", a=1)

assert isinstance(result, MockImpl)
assert result.prefix == "test"
assert result.kwargs == {"a": 1}
mock_platform.prepare_diffusion_op_runtime.assert_called_once_with("hunyuan_fused_moe")
mock_platform.get_diffusion_model_impl_qualname.assert_called_once_with("hunyuan_fused_moe")
mock_impl_class.assert_called_once_with(prefix="test", a=1)

def test_make_expert_params_mapping_delegates_to_impl(self, mocker):
"""make_expert_params_mapping should delegate to impl class method."""
import vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe as hunyuan_moe

expected_mapping = [("a", "b", 0, "c")]
mock_platform = mocker.MagicMock()
mock_platform.get_diffusion_model_impl_qualname.return_value = "mock.impl.Qualname"
mocker.patch.object(hunyuan_moe, "current_omni_platform", mock_platform)

mock_impl_class = mocker.MagicMock()
mock_impl_class.make_expert_params_mapping = mocker.MagicMock(return_value=expected_mapping)
mocker.patch.object(hunyuan_moe, "resolve_obj_by_qualname", return_value=mock_impl_class)

from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import (
HunyuanFusedMoE,
)

result = HunyuanFusedMoE.make_expert_params_mapping(
model=None,
ckpt_gate_proj_name="gate",
ckpt_down_proj_name="down",
ckpt_up_proj_name="up",
num_experts=4,
num_redundant_experts=0,
)

assert result == expected_mapping
mock_platform.get_diffusion_model_impl_qualname.assert_called_once_with("hunyuan_fused_moe")
mock_impl_class.make_expert_params_mapping.assert_called_once_with(
None,
ckpt_gate_proj_name="gate",
ckpt_down_proj_name="down",
ckpt_up_proj_name="up",
num_experts=4,
num_redundant_experts=0,
)
78 changes: 78 additions & 0 deletions tests/diffusion/test_data_is_moe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Unit tests for OmniDiffusionConfig.is_moe (fix is_moe type and threshold, 6663c0b)."""

import pytest

from vllm_omni.diffusion.data import OmniDiffusionConfig, TransformerConfig

pytestmark = [pytest.mark.core_model, pytest.mark.cpu]


class TestOmniDiffusionConfigIsMoE:
"""Tests for OmniDiffusionConfig.is_moe property.

Covers commit 6663c0b: fix is_moe type and threshold
- num_experts must be (list, tuple, int); otherwise return False.
- Threshold: is_moe is True when num_experts > 0 (not > 1).
"""

def test_is_moe_missing_num_experts_returns_false(self):
"""When num_experts is absent, is_moe should be False."""
tf_config = TransformerConfig.from_dict({})
config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
assert config.is_moe is False

def test_is_moe_none_num_experts_returns_false(self):
"""When num_experts is explicitly None (e.g. in params), is_moe should be False."""
tf_config = TransformerConfig.from_dict({"num_experts": None})
config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
assert config.is_moe is False

def test_is_moe_non_allowed_type_returns_false(self):
"""When num_experts is not int/list/tuple (e.g. str), is_moe should be False."""
tf_config = TransformerConfig.from_dict({"num_experts": "2"})
config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
assert config.is_moe is False

def test_is_moe_int_zero_returns_false(self):
"""num_experts int 0 should yield is_moe False (threshold > 0)."""
tf_config = TransformerConfig.from_dict({"num_experts": 0})
config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
assert config.is_moe is False

def test_is_moe_int_one_returns_true(self):
"""num_experts int 1 should yield is_moe True (threshold > 0, not > 1)."""
tf_config = TransformerConfig.from_dict({"num_experts": 1})
config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
assert config.is_moe is True

def test_is_moe_int_gt_one_returns_true(self):
"""num_experts int > 1 should yield is_moe True."""
tf_config = TransformerConfig.from_dict({"num_experts": 2})
config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
assert config.is_moe is True

def test_is_moe_list_all_zero_returns_false(self):
"""num_experts list with all <= 0 should yield is_moe False."""
tf_config = TransformerConfig.from_dict({"num_experts": [0]})
config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
assert config.is_moe is False

def test_is_moe_list_has_positive_returns_true(self):
"""num_experts list with any int > 0 should yield is_moe True."""
tf_config = TransformerConfig.from_dict({"num_experts": [0, 1]})
config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
assert config.is_moe is True

def test_is_moe_tuple_has_positive_returns_true(self):
"""num_experts tuple with any int > 0 should yield is_moe True."""
tf_config = TransformerConfig.from_dict({"num_experts": (0, 2)})
config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
assert config.is_moe is True

def test_is_moe_list_non_int_ignored(self):
"""num_experts list with only non-int entries should yield is_moe False."""
tf_config = TransformerConfig.from_dict({"num_experts": ["a", 0.0]})
config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
assert config.is_moe is False
6 changes: 4 additions & 2 deletions vllm_omni/diffusion/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,11 +463,13 @@ class OmniDiffusionConfig:
@property
Comment thread
ElleElleWu marked this conversation as resolved.
def is_moe(self) -> bool:
num_experts = self.tf_model_config.get("num_experts", None)
if not isinstance(num_experts, (list, tuple, int)):
return False
if isinstance(num_experts, int):
return num_experts > 1
return num_experts > 0

if isinstance(num_experts, (list, tuple)):
return any(isinstance(n, int) and n > 1 for n in num_experts)
return any(isinstance(n, int) and n > 0 for n in num_experts)

return False

Expand Down
7 changes: 2 additions & 5 deletions vllm_omni/diffusion/models/hunyuan_image_3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Hunyuan Image 3 diffusion model components."""

from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import HunyuanFusedMoE
from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_image_3_transformer import (
HunyuanImage3Model,
HunyuanImage3Text2ImagePipeline,
Expand All @@ -10,8 +11,4 @@
HunyuanImage3Pipeline,
)

__all__ = [
"HunyuanImage3Pipeline",
"HunyuanImage3Model",
"HunyuanImage3Text2ImagePipeline",
]
__all__ = ["HunyuanImage3Pipeline", "HunyuanImage3Model", "HunyuanImage3Text2ImagePipeline", "HunyuanFusedMoE"]
56 changes: 56 additions & 0 deletions vllm_omni/diffusion/models/hunyuan_image_3/hunyuan_fused_moe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from typing import Any

from vllm.model_executor.layers.fused_moe import SharedFusedMoE
from vllm.utils.import_utils import resolve_obj_by_qualname

from vllm_omni.platforms import current_omni_platform


class HunyuanFusedMoEDefault(SharedFusedMoE):
def __init__(self, *, prefix: str = "", **kwargs: Any) -> None:
super().__init__(prefix=prefix, **kwargs)
self._prefix = prefix
self._init_hook_handle = self.register_forward_pre_hook(self._initialize_kernel_hook, with_kwargs=True)

def _initialize_kernel_hook(self, module: Any, args: Any, kwargs: Any) -> None:
if self.quant_method:
self.quant_method.process_weights_after_loading(self)
self._init_hook_handle.remove()

def forward(self, hidden_states: Any, router_logits: Any) -> Any:
return super().forward(hidden_states, router_logits)


class HunyuanFusedMoE:
def __new__(cls, *, prefix: str = "", **kwargs: Any) -> Any:
op_name = "hunyuan_fused_moe"
current_omni_platform.prepare_diffusion_op_runtime(op_name)
impl = resolve_obj_by_qualname(
current_omni_platform.get_diffusion_model_impl_qualname(op_name),
)
return impl(prefix=prefix, **kwargs)

@classmethod
def make_expert_params_mapping(
cls,
model: Any,
ckpt_gate_proj_name: str,
ckpt_down_proj_name: str,
ckpt_up_proj_name: str,
num_experts: int,
num_redundant_experts: int = 0,
) -> list[tuple[str, str, int, str]]:
Comment thread
ElleElleWu marked this conversation as resolved.
impl = resolve_obj_by_qualname(
current_omni_platform.get_diffusion_model_impl_qualname("hunyuan_fused_moe"),
)
return impl.make_expert_params_mapping(
model,
ckpt_gate_proj_name=ckpt_gate_proj_name,
ckpt_down_proj_name=ckpt_down_proj_name,
ckpt_up_proj_name=ckpt_up_proj_name,
num_experts=num_experts,
num_redundant_experts=num_redundant_experts,
)
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,8 @@
from vllm.config import CacheConfig
from vllm.distributed import (
get_tensor_model_parallel_world_size,
tensor_model_parallel_all_reduce,
)
from vllm.model_executor.layers.activation import SiluAndMul
from vllm.model_executor.layers.fused_moe import SharedFusedMoE
from vllm.model_executor.layers.layernorm import RMSNorm
from vllm.model_executor.layers.linear import (
MergedColumnParallelLinear,
Expand Down Expand Up @@ -63,6 +61,7 @@
from vllm_omni.diffusion.distributed.parallel_state import get_pp_group
from vllm_omni.diffusion.distributed.utils import get_local_device
from vllm_omni.diffusion.layers.rope import RotaryEmbedding
from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import HunyuanFusedMoE

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -1417,7 +1416,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
final_hidden_states = final_hidden_states[0] + final_hidden_states[1]

if self.tp_size > 1:
final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)
final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(final_hidden_states)

return final_hidden_states.view(orig_shape)

Expand Down Expand Up @@ -1565,22 +1564,6 @@ def forward(
return output, None, past_key_value


class HunyuanFusedMoE(SharedFusedMoE):
def __init__(self, *, prefix: str = "", **kwargs):
super().__init__(prefix=prefix, **kwargs)
self._prefix = prefix

self._init_hook_handle = self.register_forward_pre_hook(self._initialize_kernel_hook, with_kwargs=True)

def _initialize_kernel_hook(self, module, args, kwargs):
if self.quant_method:
self.quant_method.process_weights_after_loading(self)
self._init_hook_handle.remove()

def forward(self, hidden_states, router_logits):
return super().forward(hidden_states, router_logits)


class HunyuanImage3DecoderLayer(nn.Module):
def __init__(self, config: HunyuanImage3Config, layer_idx: int, prefix: str = ""):
super().__init__()
Expand Down Expand Up @@ -2454,7 +2437,6 @@ def __call__(
callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)

latents = callback_outputs.pop("latents", latents)

# call the callback, if provided
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
progress_bar.update()
Expand Down
11 changes: 11 additions & 0 deletions vllm_omni/platforms/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from enum import Enum
from typing import Any

import torch
from vllm.platforms import Platform
Expand Down Expand Up @@ -52,6 +53,16 @@ def get_omni_generation_worker_cls(cls) -> str:
def get_default_stage_config_path(cls) -> str:
raise NotImplementedError

@classmethod
def get_diffusion_model_impl_qualname(cls, op_name: str) -> str:
if op_name == "hunyuan_fused_moe":
return "vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe.HunyuanFusedMoEDefault"
raise NotImplementedError(f"Unsupported diffusion model op: {op_name}")

@classmethod
def prepare_diffusion_op_runtime(cls, op_name: str, **kwargs: Any) -> None:
return None

@classmethod
def get_diffusion_attn_backend_cls(
cls,
Expand Down
2 changes: 2 additions & 0 deletions vllm_omni/platforms/npu/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
Loading
Loading