vllm-project · gcanlin · Mar 12, 2026 · Feb 4, 2026 · Mar 6, 2026 · Mar 12, 2026
@@ -65,6 +65,7 @@ th {
 |--------------|--------|-------------------|
 | `Qwen3OmniMoeForConditionalGeneration` | Qwen3-Omni | `Qwen/Qwen3-Omni-30B-A3B-Instruct` |
 | `Qwen2_5OmniForConditionalGeneration` | Qwen2.5-Omni | `Qwen/Qwen2.5-Omni-7B`, `Qwen/Qwen2.5-Omni-3B`|
+| `HunyuanImage3ForCausalMM` | HunyuanImage3.0 (DiT-only) | `tencent/HunyuanImage-3.0`, `tencent/HunyuanImage-3.0-Instruct` |
 | `QwenImagePipeline` | Qwen-Image | `Qwen/Qwen-Image` |
 | `QwenImagePipeline` | Qwen-Image-2512 | `Qwen/Qwen-Image-2512` |
 | `QwenImageEditPipeline` | Qwen-Image-Edit | `Qwen/Qwen-Image-Edit` |

@@ -0,0 +1,112 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for HunyuanFusedMoE (Support HunyuanImage3 Diffusion Model, 5a779b4)."""
+
+import pytest
+
+pytestmark = [pytest.mark.core_model, pytest.mark.cpu]
+
+
+class TestHunyuanFusedMoEPlatformDispatch:
+    """Test platform dispatch via platform qualname hooks."""
+
+    def test_default_platform_uses_default_impl_qualname(self, mocker):
+        """HunyuanFusedMoE should resolve the impl class from the platform hook."""
+        import vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe as hunyuan_moe
+
+        mock_platform = mocker.MagicMock()
+        mock_platform.get_diffusion_model_impl_qualname.return_value = (
+            "vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe.HunyuanFusedMoEDefault"
+        )
+
+        mocker.patch.object(
+            hunyuan_moe,
+            "current_omni_platform",
+            mock_platform,
+        )
+        mock_resolve = mocker.patch.object(hunyuan_moe, "resolve_obj_by_qualname")
+        mock_impl = mocker.MagicMock()
+        mock_resolve.return_value = mock_impl
+
+        from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import (
+            HunyuanFusedMoE,
+        )
+
+        HunyuanFusedMoE(prefix="")
+
+        mock_platform.prepare_diffusion_op_runtime.assert_called_once_with("hunyuan_fused_moe")
+        mock_platform.get_diffusion_model_impl_qualname.assert_called_once_with("hunyuan_fused_moe")
+        mock_resolve.assert_called_once_with(
+            "vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe.HunyuanFusedMoEDefault"
+        )
+        mock_impl.assert_called_once_with(prefix="")
+
+
+class TestHunyuanFusedMoEFactory:
+    """Test HunyuanFusedMoE factory __new__ and make_expert_params_mapping delegation."""
+
+    def test_new_delegates_to_impl_class(self, mocker):
+        """HunyuanFusedMoE(prefix=..., **kwargs) should instantiate and return impl instance."""
+        import vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe as hunyuan_moe
+
+        class MockImpl:
+            def __init__(self, *, prefix: str = "", **kwargs):
+                self.prefix = prefix
+                self.kwargs = kwargs
+
+        mock_platform = mocker.MagicMock()
+        mock_platform.get_diffusion_model_impl_qualname.return_value = "mock.impl.Qualname"
+        mocker.patch.object(hunyuan_moe, "current_omni_platform", mock_platform)
+
+        mock_impl_class = mocker.MagicMock(return_value=MockImpl(prefix="test", a=1))
+        mocker.patch.object(hunyuan_moe, "resolve_obj_by_qualname", return_value=mock_impl_class)
+
+        from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import (
+            HunyuanFusedMoE,
+        )
+
+        result = HunyuanFusedMoE(prefix="test", a=1)
+
+        assert isinstance(result, MockImpl)
+        assert result.prefix == "test"
+        assert result.kwargs == {"a": 1}
+        mock_platform.prepare_diffusion_op_runtime.assert_called_once_with("hunyuan_fused_moe")
+        mock_platform.get_diffusion_model_impl_qualname.assert_called_once_with("hunyuan_fused_moe")
+        mock_impl_class.assert_called_once_with(prefix="test", a=1)
+
+    def test_make_expert_params_mapping_delegates_to_impl(self, mocker):
+        """make_expert_params_mapping should delegate to impl class method."""
+        import vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe as hunyuan_moe
+
+        expected_mapping = [("a", "b", 0, "c")]
+        mock_platform = mocker.MagicMock()
+        mock_platform.get_diffusion_model_impl_qualname.return_value = "mock.impl.Qualname"
+        mocker.patch.object(hunyuan_moe, "current_omni_platform", mock_platform)
+
+        mock_impl_class = mocker.MagicMock()
+        mock_impl_class.make_expert_params_mapping = mocker.MagicMock(return_value=expected_mapping)
+        mocker.patch.object(hunyuan_moe, "resolve_obj_by_qualname", return_value=mock_impl_class)
+
+        from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import (
+            HunyuanFusedMoE,
+        )
+
+        result = HunyuanFusedMoE.make_expert_params_mapping(
+            model=None,
+            ckpt_gate_proj_name="gate",
+            ckpt_down_proj_name="down",
+            ckpt_up_proj_name="up",
+            num_experts=4,
+            num_redundant_experts=0,
+        )
+
+        assert result == expected_mapping
+        mock_platform.get_diffusion_model_impl_qualname.assert_called_once_with("hunyuan_fused_moe")
+        mock_impl_class.make_expert_params_mapping.assert_called_once_with(
+            None,
+            ckpt_gate_proj_name="gate",
+            ckpt_down_proj_name="down",
+            ckpt_up_proj_name="up",
+            num_experts=4,
+            num_redundant_experts=0,
+        )
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for OmniDiffusionConfig.is_moe (fix is_moe type and threshold, 6663c0b)."""
+
+import pytest
+
+from vllm_omni.diffusion.data import OmniDiffusionConfig, TransformerConfig
+
+pytestmark = [pytest.mark.core_model, pytest.mark.cpu]
+
+
+class TestOmniDiffusionConfigIsMoE:
+    """Tests for OmniDiffusionConfig.is_moe property.
+
+    Covers commit 6663c0b: fix is_moe type and threshold
+    - num_experts must be (list, tuple, int); otherwise return False.
+    - Threshold: is_moe is True when num_experts > 0 (not > 1).
+    """
+
+    def test_is_moe_missing_num_experts_returns_false(self):
+        """When num_experts is absent, is_moe should be False."""
+        tf_config = TransformerConfig.from_dict({})
+        config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
+        assert config.is_moe is False
+
+    def test_is_moe_none_num_experts_returns_false(self):
+        """When num_experts is explicitly None (e.g. in params), is_moe should be False."""
+        tf_config = TransformerConfig.from_dict({"num_experts": None})
+        config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
+        assert config.is_moe is False
+
+    def test_is_moe_non_allowed_type_returns_false(self):
+        """When num_experts is not int/list/tuple (e.g. str), is_moe should be False."""
+        tf_config = TransformerConfig.from_dict({"num_experts": "2"})
+        config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
+        assert config.is_moe is False
+
+    def test_is_moe_int_zero_returns_false(self):
+        """num_experts int 0 should yield is_moe False (threshold > 0)."""
+        tf_config = TransformerConfig.from_dict({"num_experts": 0})
+        config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
+        assert config.is_moe is False
+
+    def test_is_moe_int_one_returns_true(self):
+        """num_experts int 1 should yield is_moe True (threshold > 0, not > 1)."""
+        tf_config = TransformerConfig.from_dict({"num_experts": 1})
+        config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
+        assert config.is_moe is True
+
+    def test_is_moe_int_gt_one_returns_true(self):
+        """num_experts int > 1 should yield is_moe True."""
+        tf_config = TransformerConfig.from_dict({"num_experts": 2})
+        config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
+        assert config.is_moe is True
+
+    def test_is_moe_list_all_zero_returns_false(self):
+        """num_experts list with all <= 0 should yield is_moe False."""
+        tf_config = TransformerConfig.from_dict({"num_experts": [0]})
+        config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
+        assert config.is_moe is False
+
+    def test_is_moe_list_has_positive_returns_true(self):
+        """num_experts list with any int > 0 should yield is_moe True."""
+        tf_config = TransformerConfig.from_dict({"num_experts": [0, 1]})
+        config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
+        assert config.is_moe is True
+
+    def test_is_moe_tuple_has_positive_returns_true(self):
+        """num_experts tuple with any int > 0 should yield is_moe True."""
+        tf_config = TransformerConfig.from_dict({"num_experts": (0, 2)})
+        config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
+        assert config.is_moe is True
+
+    def test_is_moe_list_non_int_ignored(self):
+        """num_experts list with only non-int entries should yield is_moe False."""
+        tf_config = TransformerConfig.from_dict({"num_experts": ["a", 0.0]})
+        config = OmniDiffusionConfig(model="test", tf_model_config=tf_config)
+        assert config.is_moe is False
@@ -463,11 +463,13 @@ class OmniDiffusionConfig:
     @property
     def is_moe(self) -> bool:
         num_experts = self.tf_model_config.get("num_experts", None)
+        if not isinstance(num_experts, (list, tuple, int)):
+            return False
         if isinstance(num_experts, int):
-            return num_experts > 1
+            return num_experts > 0
 
         if isinstance(num_experts, (list, tuple)):
-            return any(isinstance(n, int) and n > 1 for n in num_experts)
+            return any(isinstance(n, int) and n > 0 for n in num_experts)
 
         return False
 

@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Hunyuan Image 3 diffusion model components."""
 
+from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import HunyuanFusedMoE
 from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_image_3_transformer import (
     HunyuanImage3Model,
     HunyuanImage3Text2ImagePipeline,
@@ -10,8 +11,4 @@
     HunyuanImage3Pipeline,
 )
 
-__all__ = [
-    "HunyuanImage3Pipeline",
-    "HunyuanImage3Model",
-    "HunyuanImage3Text2ImagePipeline",
-]
+__all__ = ["HunyuanImage3Pipeline", "HunyuanImage3Model", "HunyuanImage3Text2ImagePipeline", "HunyuanFusedMoE"]
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import Any
+
+from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.utils.import_utils import resolve_obj_by_qualname
+
+from vllm_omni.platforms import current_omni_platform
+
+
+class HunyuanFusedMoEDefault(SharedFusedMoE):
+    def __init__(self, *, prefix: str = "", **kwargs: Any) -> None:
+        super().__init__(prefix=prefix, **kwargs)
+        self._prefix = prefix
+        self._init_hook_handle = self.register_forward_pre_hook(self._initialize_kernel_hook, with_kwargs=True)
+
+    def _initialize_kernel_hook(self, module: Any, args: Any, kwargs: Any) -> None:
+        if self.quant_method:
+            self.quant_method.process_weights_after_loading(self)
+        self._init_hook_handle.remove()
+
+    def forward(self, hidden_states: Any, router_logits: Any) -> Any:
+        return super().forward(hidden_states, router_logits)
+
+
+class HunyuanFusedMoE:
+    def __new__(cls, *, prefix: str = "", **kwargs: Any) -> Any:
+        op_name = "hunyuan_fused_moe"
+        current_omni_platform.prepare_diffusion_op_runtime(op_name)
+        impl = resolve_obj_by_qualname(
+            current_omni_platform.get_diffusion_model_impl_qualname(op_name),
+        )
+        return impl(prefix=prefix, **kwargs)
+
+    @classmethod
+    def make_expert_params_mapping(
+        cls,
+        model: Any,
+        ckpt_gate_proj_name: str,
+        ckpt_down_proj_name: str,
+        ckpt_up_proj_name: str,
+        num_experts: int,
+        num_redundant_experts: int = 0,
+    ) -> list[tuple[str, str, int, str]]:
+        impl = resolve_obj_by_qualname(
+            current_omni_platform.get_diffusion_model_impl_qualname("hunyuan_fused_moe"),
+        )
+        return impl.make_expert_params_mapping(
+            model,
+            ckpt_gate_proj_name=ckpt_gate_proj_name,
+            ckpt_down_proj_name=ckpt_down_proj_name,
+            ckpt_up_proj_name=ckpt_up_proj_name,
+            num_experts=num_experts,
+            num_redundant_experts=num_redundant_experts,
+        )
@@ -32,10 +32,8 @@
 from vllm.config import CacheConfig
 from vllm.distributed import (
     get_tensor_model_parallel_world_size,
-    tensor_model_parallel_all_reduce,
 )
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     MergedColumnParallelLinear,
@@ -63,6 +61,7 @@
 from vllm_omni.diffusion.distributed.parallel_state import get_pp_group
 from vllm_omni.diffusion.distributed.utils import get_local_device
 from vllm_omni.diffusion.layers.rope import RotaryEmbedding
+from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import HunyuanFusedMoE
 
 logger = logging.getLogger(__name__)
 
@@ -1417,7 +1416,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
             final_hidden_states = final_hidden_states[0] + final_hidden_states[1]
 
         if self.tp_size > 1:
-            final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)
+            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(final_hidden_states)
 
         return final_hidden_states.view(orig_shape)
 
@@ -1565,22 +1564,6 @@ def forward(
         return output, None, past_key_value
 
 
-class HunyuanFusedMoE(SharedFusedMoE):
-    def __init__(self, *, prefix: str = "", **kwargs):
-        super().__init__(prefix=prefix, **kwargs)
-        self._prefix = prefix
-
-        self._init_hook_handle = self.register_forward_pre_hook(self._initialize_kernel_hook, with_kwargs=True)
-
-    def _initialize_kernel_hook(self, module, args, kwargs):
-        if self.quant_method:
-            self.quant_method.process_weights_after_loading(self)
-        self._init_hook_handle.remove()
-
-    def forward(self, hidden_states, router_logits):
-        return super().forward(hidden_states, router_logits)
-
-
 class HunyuanImage3DecoderLayer(nn.Module):
     def __init__(self, config: HunyuanImage3Config, layer_idx: int, prefix: str = ""):
         super().__init__()
@@ -2454,7 +2437,6 @@ def __call__(
                     callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
 
                     latents = callback_outputs.pop("latents", latents)
-
                 # call the callback, if provided
                 if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
                     progress_bar.update()

@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from enum import Enum
+from typing import Any
 
 import torch
 from vllm.platforms import Platform
@@ -52,6 +53,16 @@ def get_omni_generation_worker_cls(cls) -> str:
     def get_default_stage_config_path(cls) -> str:
         raise NotImplementedError
 
+    @classmethod
+    def get_diffusion_model_impl_qualname(cls, op_name: str) -> str:
+        if op_name == "hunyuan_fused_moe":
+            return "vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe.HunyuanFusedMoEDefault"
+        raise NotImplementedError(f"Unsupported diffusion model op: {op_name}")
+
+    @classmethod
+    def prepare_diffusion_op_runtime(cls, op_name: str, **kwargs: Any) -> None:
+        return None
+
     @classmethod
     def get_diffusion_attn_backend_cls(
         cls,

@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# SPDX-License-Identifier: Apache-2.0
		# SPDX-FileCopyrightText: Copyright contributors to the vLLM project