diff --git a/examples/offline_inference/bagel/end2end.py b/examples/offline_inference/bagel/end2end.py index 6562f32ae6a..922a1af2368 100644 --- a/examples/offline_inference/bagel/end2end.py +++ b/examples/offline_inference/bagel/end2end.py @@ -101,7 +101,7 @@ def main(): if not prompts: # Default prompt for text2img test if none provided - prompts = ["<|im_start|>A cute cat<|im_end|>"] + prompts = ["A cute cat"] print(f"[Info] No prompts provided, using default: {prompts}") omni_outputs = [] diff --git a/examples/online_serving/bagel/openai_chat_client.py b/examples/online_serving/bagel/openai_chat_client.py index fd5f4cac5d7..cc9ec32db91 100755 --- a/examples/online_serving/bagel/openai_chat_client.py +++ b/examples/online_serving/bagel/openai_chat_client.py @@ -125,7 +125,7 @@ def generate_image( def main(): parser = argparse.ArgumentParser(description="Bagel multimodal chat client") - parser.add_argument("--prompt", "-p", default="<|im_start|>A cute cat<|im_end|>", help="Text prompt") + parser.add_argument("--prompt", "-p", default="A cute cat", help="Text prompt") parser.add_argument("--output", "-o", default="bagel_output.png", help="Output file (for image results)") parser.add_argument("--server", "-s", default="http://localhost:8091", help="Server URL") diff --git a/vllm_omni/model_executor/stage_configs/bagel_single_stage.yaml b/vllm_omni/model_executor/stage_configs/bagel_single_stage.yaml new file mode 100644 index 00000000000..2c1d84af493 --- /dev/null +++ b/vllm_omni/model_executor/stage_configs/bagel_single_stage.yaml @@ -0,0 +1,32 @@ +# Stage 0: Thinker (multimodal understanding + text generation) + +stage_args: + + - stage_id: 0 + stage_type: diffusion + runtime: + devices: "0" + engine_args: + model_stage: dit + max_num_seqs: 1 + gpu_memory_utilization: 0.45 + enforce_eager: true + trust_remote_code: true + engine_output_type: image + distributed_executor_backend: "mp" + enable_prefix_caching: false + max_num_batched_tokens: 32768 + tensor_parallel_size: 1 + + final_output: true + final_output_type: image + is_comprehension: false + default_sampling_params: + seed: 52 + +# Runtime edges +runtime: + enabled: true + defaults: + window_size: -1 + max_inflight: 1