diff --git a/vllm_omni/entrypoints/openai/api_server.py b/vllm_omni/entrypoints/openai/api_server.py index 09e4b6b6688..e58b490a772 100644 --- a/vllm_omni/entrypoints/openai/api_server.py +++ b/vllm_omni/entrypoints/openai/api_server.py @@ -1696,10 +1696,12 @@ async def edit_images( # vllm-omni extension for layered models (e.g., Qwen-Image-Layered) layers: int | None = Form(None), resolution: int | None = Form(None), # See SUPPORTED_LAYERED_RESOLUTIONS + bot_task: str | None = Form(None), ) -> ImageGenerationResponse: """ OpenAI-compatible image edit endpoint. """ + # 1. get engine and model engine_client, model_name, stage_configs = _get_engine_and_model(raw_request) if model is not None and model != model_name: @@ -1899,6 +1901,8 @@ async def edit_images( lora_dict = _get_lora_from_json_str(lora) _parse_lora_request(lora_dict) extra_body["lora"] = lora_dict + if bot_task is not None: + extra_body["bot_task"] = bot_task prompt_text = prompt.get("prompt", "") generation_result = await chat_handler.generate_diffusion_images( @@ -2218,6 +2222,7 @@ async def _load_input_images( images.append(img) except Exception as e: raise ValueError(f"Failed to open uploaded file: {e}") + else: raise ValueError(f"Unsupported input: {inp}") diff --git a/vllm_omni/entrypoints/openai/serving_chat.py b/vllm_omni/entrypoints/openai/serving_chat.py index 32f41a64395..48319c91336 100644 --- a/vllm_omni/entrypoints/openai/serving_chat.py +++ b/vllm_omni/entrypoints/openai/serving_chat.py @@ -2148,6 +2148,7 @@ def _build_multistage_generation_inputs( lora_body = extra_body.get("lora") layers = extra_body.get("layers") resolution = extra_body.get("resolution") + bot_task = extra_body.get("bot_task") engine_prompt_data: dict[str, Any] | None = None modalities = ["image"] @@ -2158,6 +2159,14 @@ def _build_multistage_generation_inputs( else: engine_prompt_data = {"image": reference_images} + if bot_task: + from vllm_omni.diffusion.models.hunyuan_image3.prompt_utils import build_prompt + + prompt = build_prompt(prompt, task=bot_task) + if reference_images and len(reference_images) == 1: + engine_prompt_data = {"image": reference_images[0]} + modalities = ["image"] + engine_prompt: OmniTextPrompt = {"prompt": prompt} engine_prompt["modalities"] = modalities if negative_prompt is not None: