-
Notifications
You must be signed in to change notification settings - Fork 1k
[Model] Extend NPU support for HunyuanImage3 Diffusion Model #1689
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
gcanlin
merged 6 commits into
vllm-project:main
from
Semmer2:HunyuanImage3_npu_0.16.0_and_ep
Mar 12, 2026
Merged
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
c72fefe
[Model] Support HunyuanImage3 Diffusion Model for GPU and NPU
ElleElleWu bac1937
fix is_moe type and threshold, add UT for is_moe, Hunyuan_fused_moe
ElleElleWu 6603fc7
fix hunyuan_fused_moe for xpu and other devices
ElleElleWu 79d77c9
Refactor hardware dispathc
gcanlin 022e407
rename
gcanlin c526392
Merge branch 'main' into HunyuanImage3_npu_0.16.0_and_ep
hsliuustc0106 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
112 changes: 112 additions & 0 deletions
112
tests/diffusion/models/hunyuan_image_3/test_hunyuan_fused_moe.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,112 @@ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # SPDX-FileCopyrightText: Copyright contributors to the vLLM project | ||
| """Unit tests for HunyuanFusedMoE (Support HunyuanImage3 Diffusion Model, 5a779b4).""" | ||
|
|
||
| import pytest | ||
|
|
||
| pytestmark = [pytest.mark.core_model, pytest.mark.cpu] | ||
|
|
||
|
|
||
| class TestHunyuanFusedMoEPlatformDispatch: | ||
| """Test platform dispatch via platform qualname hooks.""" | ||
|
|
||
| def test_default_platform_uses_default_impl_qualname(self, mocker): | ||
| """HunyuanFusedMoE should resolve the impl class from the platform hook.""" | ||
| import vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe as hunyuan_moe | ||
|
|
||
| mock_platform = mocker.MagicMock() | ||
| mock_platform.get_diffusion_model_impl_qualname.return_value = ( | ||
| "vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe.HunyuanFusedMoEDefault" | ||
| ) | ||
|
|
||
| mocker.patch.object( | ||
| hunyuan_moe, | ||
| "current_omni_platform", | ||
| mock_platform, | ||
| ) | ||
| mock_resolve = mocker.patch.object(hunyuan_moe, "resolve_obj_by_qualname") | ||
| mock_impl = mocker.MagicMock() | ||
| mock_resolve.return_value = mock_impl | ||
|
|
||
| from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import ( | ||
| HunyuanFusedMoE, | ||
| ) | ||
|
|
||
| HunyuanFusedMoE(prefix="") | ||
|
|
||
| mock_platform.prepare_diffusion_op_runtime.assert_called_once_with("hunyuan_fused_moe") | ||
| mock_platform.get_diffusion_model_impl_qualname.assert_called_once_with("hunyuan_fused_moe") | ||
| mock_resolve.assert_called_once_with( | ||
| "vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe.HunyuanFusedMoEDefault" | ||
| ) | ||
| mock_impl.assert_called_once_with(prefix="") | ||
|
|
||
|
|
||
| class TestHunyuanFusedMoEFactory: | ||
| """Test HunyuanFusedMoE factory __new__ and make_expert_params_mapping delegation.""" | ||
|
|
||
| def test_new_delegates_to_impl_class(self, mocker): | ||
| """HunyuanFusedMoE(prefix=..., **kwargs) should instantiate and return impl instance.""" | ||
| import vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe as hunyuan_moe | ||
|
|
||
| class MockImpl: | ||
| def __init__(self, *, prefix: str = "", **kwargs): | ||
| self.prefix = prefix | ||
| self.kwargs = kwargs | ||
|
|
||
| mock_platform = mocker.MagicMock() | ||
| mock_platform.get_diffusion_model_impl_qualname.return_value = "mock.impl.Qualname" | ||
| mocker.patch.object(hunyuan_moe, "current_omni_platform", mock_platform) | ||
|
|
||
| mock_impl_class = mocker.MagicMock(return_value=MockImpl(prefix="test", a=1)) | ||
| mocker.patch.object(hunyuan_moe, "resolve_obj_by_qualname", return_value=mock_impl_class) | ||
|
|
||
| from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import ( | ||
| HunyuanFusedMoE, | ||
| ) | ||
|
|
||
| result = HunyuanFusedMoE(prefix="test", a=1) | ||
|
|
||
| assert isinstance(result, MockImpl) | ||
| assert result.prefix == "test" | ||
| assert result.kwargs == {"a": 1} | ||
| mock_platform.prepare_diffusion_op_runtime.assert_called_once_with("hunyuan_fused_moe") | ||
| mock_platform.get_diffusion_model_impl_qualname.assert_called_once_with("hunyuan_fused_moe") | ||
| mock_impl_class.assert_called_once_with(prefix="test", a=1) | ||
|
|
||
| def test_make_expert_params_mapping_delegates_to_impl(self, mocker): | ||
| """make_expert_params_mapping should delegate to impl class method.""" | ||
| import vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe as hunyuan_moe | ||
|
|
||
| expected_mapping = [("a", "b", 0, "c")] | ||
| mock_platform = mocker.MagicMock() | ||
| mock_platform.get_diffusion_model_impl_qualname.return_value = "mock.impl.Qualname" | ||
| mocker.patch.object(hunyuan_moe, "current_omni_platform", mock_platform) | ||
|
|
||
| mock_impl_class = mocker.MagicMock() | ||
| mock_impl_class.make_expert_params_mapping = mocker.MagicMock(return_value=expected_mapping) | ||
| mocker.patch.object(hunyuan_moe, "resolve_obj_by_qualname", return_value=mock_impl_class) | ||
|
|
||
| from vllm_omni.diffusion.models.hunyuan_image_3.hunyuan_fused_moe import ( | ||
| HunyuanFusedMoE, | ||
| ) | ||
|
|
||
| result = HunyuanFusedMoE.make_expert_params_mapping( | ||
| model=None, | ||
| ckpt_gate_proj_name="gate", | ||
| ckpt_down_proj_name="down", | ||
| ckpt_up_proj_name="up", | ||
| num_experts=4, | ||
| num_redundant_experts=0, | ||
| ) | ||
|
|
||
| assert result == expected_mapping | ||
| mock_platform.get_diffusion_model_impl_qualname.assert_called_once_with("hunyuan_fused_moe") | ||
| mock_impl_class.make_expert_params_mapping.assert_called_once_with( | ||
| None, | ||
| ckpt_gate_proj_name="gate", | ||
| ckpt_down_proj_name="down", | ||
| ckpt_up_proj_name="up", | ||
| num_experts=4, | ||
| num_redundant_experts=0, | ||
| ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # SPDX-FileCopyrightText: Copyright contributors to the vLLM project | ||
| """Unit tests for OmniDiffusionConfig.is_moe (fix is_moe type and threshold, 6663c0b).""" | ||
|
|
||
| import pytest | ||
|
|
||
| from vllm_omni.diffusion.data import OmniDiffusionConfig, TransformerConfig | ||
|
|
||
| pytestmark = [pytest.mark.core_model, pytest.mark.cpu] | ||
|
|
||
|
|
||
| class TestOmniDiffusionConfigIsMoE: | ||
| """Tests for OmniDiffusionConfig.is_moe property. | ||
|
|
||
| Covers commit 6663c0b: fix is_moe type and threshold | ||
| - num_experts must be (list, tuple, int); otherwise return False. | ||
| - Threshold: is_moe is True when num_experts > 0 (not > 1). | ||
| """ | ||
|
|
||
| def test_is_moe_missing_num_experts_returns_false(self): | ||
| """When num_experts is absent, is_moe should be False.""" | ||
| tf_config = TransformerConfig.from_dict({}) | ||
| config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) | ||
| assert config.is_moe is False | ||
|
|
||
| def test_is_moe_none_num_experts_returns_false(self): | ||
| """When num_experts is explicitly None (e.g. in params), is_moe should be False.""" | ||
| tf_config = TransformerConfig.from_dict({"num_experts": None}) | ||
| config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) | ||
| assert config.is_moe is False | ||
|
|
||
| def test_is_moe_non_allowed_type_returns_false(self): | ||
| """When num_experts is not int/list/tuple (e.g. str), is_moe should be False.""" | ||
| tf_config = TransformerConfig.from_dict({"num_experts": "2"}) | ||
| config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) | ||
| assert config.is_moe is False | ||
|
|
||
| def test_is_moe_int_zero_returns_false(self): | ||
| """num_experts int 0 should yield is_moe False (threshold > 0).""" | ||
| tf_config = TransformerConfig.from_dict({"num_experts": 0}) | ||
| config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) | ||
| assert config.is_moe is False | ||
|
|
||
| def test_is_moe_int_one_returns_true(self): | ||
| """num_experts int 1 should yield is_moe True (threshold > 0, not > 1).""" | ||
| tf_config = TransformerConfig.from_dict({"num_experts": 1}) | ||
| config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) | ||
| assert config.is_moe is True | ||
|
|
||
| def test_is_moe_int_gt_one_returns_true(self): | ||
| """num_experts int > 1 should yield is_moe True.""" | ||
| tf_config = TransformerConfig.from_dict({"num_experts": 2}) | ||
| config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) | ||
| assert config.is_moe is True | ||
|
|
||
| def test_is_moe_list_all_zero_returns_false(self): | ||
| """num_experts list with all <= 0 should yield is_moe False.""" | ||
| tf_config = TransformerConfig.from_dict({"num_experts": [0]}) | ||
| config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) | ||
| assert config.is_moe is False | ||
|
|
||
| def test_is_moe_list_has_positive_returns_true(self): | ||
| """num_experts list with any int > 0 should yield is_moe True.""" | ||
| tf_config = TransformerConfig.from_dict({"num_experts": [0, 1]}) | ||
| config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) | ||
| assert config.is_moe is True | ||
|
|
||
| def test_is_moe_tuple_has_positive_returns_true(self): | ||
| """num_experts tuple with any int > 0 should yield is_moe True.""" | ||
| tf_config = TransformerConfig.from_dict({"num_experts": (0, 2)}) | ||
| config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) | ||
| assert config.is_moe is True | ||
|
|
||
| def test_is_moe_list_non_int_ignored(self): | ||
| """num_experts list with only non-int entries should yield is_moe False.""" | ||
| tf_config = TransformerConfig.from_dict({"num_experts": ["a", 0.0]}) | ||
| config = OmniDiffusionConfig(model="test", tf_model_config=tf_config) | ||
| assert config.is_moe is False |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
56 changes: 56 additions & 0 deletions
56
vllm_omni/diffusion/models/hunyuan_image_3/hunyuan_fused_moe.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # SPDX-FileCopyrightText: Copyright contributors to the vLLM project | ||
|
|
||
| from typing import Any | ||
|
|
||
| from vllm.model_executor.layers.fused_moe import SharedFusedMoE | ||
| from vllm.utils.import_utils import resolve_obj_by_qualname | ||
|
|
||
| from vllm_omni.platforms import current_omni_platform | ||
|
|
||
|
|
||
| class HunyuanFusedMoEDefault(SharedFusedMoE): | ||
| def __init__(self, *, prefix: str = "", **kwargs: Any) -> None: | ||
| super().__init__(prefix=prefix, **kwargs) | ||
| self._prefix = prefix | ||
| self._init_hook_handle = self.register_forward_pre_hook(self._initialize_kernel_hook, with_kwargs=True) | ||
|
|
||
| def _initialize_kernel_hook(self, module: Any, args: Any, kwargs: Any) -> None: | ||
| if self.quant_method: | ||
| self.quant_method.process_weights_after_loading(self) | ||
| self._init_hook_handle.remove() | ||
|
|
||
| def forward(self, hidden_states: Any, router_logits: Any) -> Any: | ||
| return super().forward(hidden_states, router_logits) | ||
|
|
||
|
|
||
| class HunyuanFusedMoE: | ||
| def __new__(cls, *, prefix: str = "", **kwargs: Any) -> Any: | ||
| op_name = "hunyuan_fused_moe" | ||
| current_omni_platform.prepare_diffusion_op_runtime(op_name) | ||
| impl = resolve_obj_by_qualname( | ||
| current_omni_platform.get_diffusion_model_impl_qualname(op_name), | ||
| ) | ||
| return impl(prefix=prefix, **kwargs) | ||
|
|
||
| @classmethod | ||
| def make_expert_params_mapping( | ||
| cls, | ||
| model: Any, | ||
| ckpt_gate_proj_name: str, | ||
| ckpt_down_proj_name: str, | ||
| ckpt_up_proj_name: str, | ||
| num_experts: int, | ||
| num_redundant_experts: int = 0, | ||
| ) -> list[tuple[str, str, int, str]]: | ||
|
ElleElleWu marked this conversation as resolved.
|
||
| impl = resolve_obj_by_qualname( | ||
| current_omni_platform.get_diffusion_model_impl_qualname("hunyuan_fused_moe"), | ||
| ) | ||
| return impl.make_expert_params_mapping( | ||
| model, | ||
| ckpt_gate_proj_name=ckpt_gate_proj_name, | ||
| ckpt_down_proj_name=ckpt_down_proj_name, | ||
| ckpt_up_proj_name=ckpt_up_proj_name, | ||
| num_experts=num_experts, | ||
| num_redundant_experts=num_redundant_experts, | ||
| ) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # SPDX-FileCopyrightText: Copyright contributors to the vLLM project |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.