Skip to content
16 changes: 15 additions & 1 deletion examples/offline_inference/text_to_image/text_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,17 @@ def parse_args() -> argparse.Namespace:
"--model",
default="Qwen/Qwen-Image",
help="Diffusion model name or local path. Supported models: "
"Qwen/Qwen-Image, Tongyi-MAI/Z-Image-Turbo, Qwen/Qwen-Image-2512, stepfun-ai/NextStep-1.1",
"Qwen/Qwen-Image, Tongyi-MAI/Z-Image-Turbo, Qwen/Qwen-Image-2512, stepfun-ai/NextStep-1.1, "
"black-forest-labs/FLUX.1-dev, black-forest-labs/FLUX.2-klein-9B, "
"black-forest-labs/FLUX.2-dev, tencent/HunyuanImage-3.0-Instruct, "
"meituan-longcat/LongCat-Image, OvisAI/Ovis-Image, "
"stabilityai/stable-diffusion-3.5-medium, Tongyi-MAI/Z-Image-Turbo and etc.",
)
parser.add_argument(
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ZJY0516 @SamitHuang PTAL whether we have stage config yaml for diffusion model?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't usually have, this only needed by hunyuan
and I proposed a change to introduce stage selection in #1826
if that merged, we don't really need user to specify stage_configs_yaml here.

"--stage-configs-path",
type=str,
default=None,
help="Path to a YAML file containing stage configurations for Omni.",
)
parser.add_argument("--prompt", default="a cup of coffee on the table", help="Text prompt for image generation.")
parser.add_argument(
Expand Down Expand Up @@ -311,6 +321,8 @@ def main():
**lora_args,
**quant_kwargs,
}
if args.stage_configs_path:
omni_kwargs["stage_configs_path"] = args.stage_configs_path
if use_nextstep:
# NextStep-1.1 requires explicit pipeline class
omni_kwargs["model_class_name"] = "NextStep11Pipeline"
Expand Down Expand Up @@ -338,6 +350,8 @@ def main():
print(f" Image size: {args.width}x{args.height}")
if args.lora_path:
print(f" LoRA: scale={args.lora_scale}")
if args.stage_configs_path:
print(f" stage-configs-path: {args.stage_configs_path}")
print(f"{'=' * 60}\n")

# Build LoRA request when --lora-path is set
Expand Down
7 changes: 7 additions & 0 deletions vllm_omni/diffusion/model_loader/diffusers_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,8 +328,15 @@ def load_weights(self, model: nn.Module) -> None:
# that have loaded weights tracking currently.
if loaded_weights is not None:
weights_not_loaded = weights_to_load - loaded_weights
# NOTE: if the model is quantized, ignore not_loaded check for scale weights
weights_scale_not_loaded = {name for name in weights_not_loaded if name.endswith("weight_scale")}
weights_not_loaded = weights_not_loaded - weights_scale_not_loaded
if weights_not_loaded:
raise ValueError(f"Following weights were not initialized from checkpoint: {weights_not_loaded}")
if weights_scale_not_loaded:
logger.warning(
f"Following weight_scale weights were not initialized from checkpoint: {weights_scale_not_loaded}"
)

def _is_gguf_quantization(self, od_config: OmniDiffusionConfig) -> bool:
quant_config = od_config.quantization_config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1565,7 +1565,9 @@ def forward(


class HunyuanImage3DecoderLayer(nn.Module):
def __init__(self, config: HunyuanImage3Config, layer_idx: int, prefix: str = ""):
def __init__(
self, config: HunyuanImage3Config, quant_config: QuantizationConfig | None, layer_idx: int, prefix: str = ""
):
super().__init__()
self.hidden_size = config.hidden_size
self.layer_idx = layer_idx
Expand Down Expand Up @@ -1607,9 +1609,13 @@ def __init__(self, config: HunyuanImage3Config, layer_idx: int, prefix: str = ""
(isinstance(config.num_experts, int) and config.num_experts > 1)
or (isinstance(config.num_experts, list) and max(config.num_experts) > 1)
) and layer_idx >= config.moe_layer_num_skipped:
self.mlp = HunYuanSparseMoeBlock(config, layer_id=layer_idx, prefix=f"{prefix}.mlp")
self.mlp = HunYuanSparseMoeBlock(
config, quant_config=quant_config, layer_id=layer_idx, prefix=f"{prefix}.mlp"
)
else:
self.mlp = HunYuanMLP(self.hidden_size, self.intermediate_size, config.hidden_act)
self.mlp = HunYuanMLP(
self.hidden_size, self.intermediate_size, config.hidden_act, quant_config=quant_config
)

self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
Expand Down Expand Up @@ -1704,14 +1710,15 @@ def _init_weights(self, module):


class HunyuanImage3Model(nn.Module):
def __init__(self, config: HunyuanImage3Config, prefix: str = ""):
def __init__(self, config: HunyuanImage3Config, quant_config: QuantizationConfig | None, prefix: str = ""):
super().__init__()
quant_config = None
lora_config = None
self.num_redundant_experts = 0
self.config = config
self.device = get_local_device()

self.quant_config = quant_config
logger.debug(f"quant_config: {quant_config}")
self.padding_idx = config.pad_token_id
lora_vocab = (lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1)) if lora_config else 0
self.vocab_size = config.vocab_size + lora_vocab
Expand All @@ -1730,6 +1737,7 @@ def __init__(self, config: HunyuanImage3Config, prefix: str = ""):
config.num_hidden_layers,
lambda prefix: HunyuanImage3DecoderLayer(
config=config,
quant_config=quant_config,
layer_idx=int(prefix.split(".")[-1]),
prefix=prefix,
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from vllm_omni.diffusion.data import DiffusionOutput, OmniDiffusionConfig
from vllm_omni.diffusion.model_loader.diffusers_loader import DiffusersPipelineLoader
from vllm_omni.diffusion.quantization import get_vllm_quant_config_for_layers
from vllm_omni.diffusion.request import OmniDiffusionRequest

from .autoencoder import AutoencoderKLConv3D
Expand Down Expand Up @@ -77,7 +78,8 @@ def __init__(self, od_config: OmniDiffusionConfig) -> None:
fall_back_to_pt=True,
)
]
self.model = HunyuanImage3Model(self.hf_config)
quant_config = get_vllm_quant_config_for_layers(od_config.quantization_config)
self.model = HunyuanImage3Model(self.hf_config, quant_config=quant_config)
self.vae = AutoencoderKLConv3D.from_config(self.hf_config.vae)
self._pipeline = None
self._tkwrapper = TokenizerWrapper(od_config.model)
Expand Down
Loading