vllm-project · princepride · Apr 3, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026
@@ -2,6 +2,7 @@
 import os
 
 from vllm_omni.inputs.data import OmniPromptType
+from vllm_omni.model_executor.stage_input_processors.bagel import GEN_THINK_SYSTEM_PROMPT
 
 
 def parse_args():
@@ -65,6 +66,17 @@ def parse_args():
         help="CFG parallel size: 1=batched (single GPU), 2=parallel with 2 branches (text CFG only), 3=parallel (3 GPUs).",
     )
     parser.add_argument("--seed", type=int, default=None, help="Random seed for generation.")
+    parser.add_argument(
+        "--cfg-interval",
+        type=float,
+        nargs=2,
+        default=None,
+        help="CFG interval [start, end] (default: pipeline default)",
+    )
+    parser.add_argument(
+        "--cfg-renorm-type", type=str, default=None, help="CFG renorm type: global, text_channel, channel"
+    )
+    parser.add_argument("--cfg-renorm-min", type=float, default=None, help="CFG renorm min")
     parser.add_argument(
         "--enable-diffusion-pipeline-profiler",
         action="store_true",
@@ -76,6 +88,12 @@ def parse_args():
         default=None,
         help="Quantization method (e.g. 'fp8').",
     )
+    parser.add_argument(
+        "--think",
+        action="store_true",
+        default=False,
+        help="Enable thinking mode: AR stage decodes <think>...</think> planning tokens before image generation.",
+    )
 
     args = parser.parse_args()
     return args
@@ -110,8 +128,12 @@ def main():
     from vllm_omni.entrypoints.omni import Omni
 
     omni_kwargs = {}
-    if args.stage_configs_path:
-        omni_kwargs["stage_configs_path"] = args.stage_configs_path
+    stage_configs_path = args.stage_configs_path
+    if args.think and stage_configs_path is None:
+        stage_configs_path = "vllm_omni/model_executor/stage_configs/bagel_think.yaml"
+        print(f"[Info] Think mode enabled, using stage config: {stage_configs_path}")
+    if stage_configs_path:
+        omni_kwargs["stage_configs_path"] = stage_configs_path
 
     omni_kwargs.update(
         {
@@ -136,7 +158,8 @@ def main():
             if not args.image_path or not os.path.exists(args.image_path):
                 raise ValueError(f"img2img requires --image-path pointing to an existing file, got: {args.image_path}")
             loaded_image = Image.open(args.image_path).convert("RGB")
-            final_prompt_text = f"<|fim_middle|><|im_start|>{p}<|im_end|>"
+            think_prefix = f"<|im_start|>{GEN_THINK_SYSTEM_PROMPT}<|im_end|>" if args.think else ""
+            final_prompt_text = f"{think_prefix}<|fim_middle|><|im_start|>{p}<|im_end|>"
             prompt_dict = {
                 "prompt": final_prompt_text,
                 "multi_modal_data": {"img2img": loaded_image},
@@ -160,7 +183,8 @@ def main():
             prompt_dict = {"prompt": final_prompt_text, "modalities": ["text"]}
             formatted_prompts.append(prompt_dict)
         else:
-            final_prompt_text = f"<|im_start|>{p}<|im_end|>"
+            think_prefix = f"<|im_start|>{GEN_THINK_SYSTEM_PROMPT}<|im_end|>" if args.think else ""
+            final_prompt_text = f"{think_prefix}<|im_start|>{p}<|im_end|>"
             prompt_dict = {"prompt": final_prompt_text, "modalities": ["image"]}
             if args.negative_prompt is not None:
                 prompt_dict["negative_prompt"] = args.negative_prompt
@@ -178,6 +202,12 @@ def main():
                 "cfg_text_scale": args.cfg_text_scale,
                 "cfg_img_scale": args.cfg_img_scale,
             }
+            if args.cfg_interval is not None:
+                extra["cfg_interval"] = tuple(args.cfg_interval)
+            if args.cfg_renorm_type is not None:
+                extra["cfg_renorm_type"] = args.cfg_renorm_type
+            if args.cfg_renorm_min is not None:
+                extra["cfg_renorm_min"] = args.cfg_renorm_min
             if args.negative_prompt is not None:
                 extra["negative_prompt"] = args.negative_prompt
             diffusion_params.extra_args = extra  # type: ignore
@@ -186,6 +216,17 @@ def main():
 
     img_idx = 0
     for req_output in omni_outputs:
+        if args.think:
+            text_output = getattr(req_output, "text", None) or getattr(req_output, "outputs", None)
+            if text_output:
+                if isinstance(text_output, list) and text_output:
+                    for out in text_output:
+                        txt = getattr(out, "text", str(out))
+                        if txt:
+                            print(f"[Think] {txt}")
+                elif isinstance(text_output, str):
+                    print(f"[Think] {text_output}")
+
         images = getattr(req_output, "images", None)
 
         if not images:
@@ -194,6 +235,7 @@ def main():
         for j, img in enumerate(images):
             save_path = os.path.join(args.output, f"output_{img_idx}_{j}.png")
             img.save(save_path)
+            print(f"[Output] Saved image to {save_path}")
         img_idx += 1
 
     print(omni_outputs)

@@ -326,11 +326,18 @@ def forward(self, req: OmniDiffusionRequest) -> DiffusionOutput:
         cfg_text_scale = extra_args.get("cfg_text_scale", 4.0)
         cfg_img_scale = extra_args.get("cfg_img_scale", 1.5)
 
+        cfg_interval = extra_args.get("cfg_interval", (0.4, 1.0))
+        cfg_renorm_type = extra_args.get("cfg_renorm_type", "global")
+        cfg_renorm_min = extra_args.get("cfg_renorm_min", 0.0)
+
         gen_params = BagelGenParams(
             num_timesteps=int(req.sampling_params.num_inference_steps or 50),
             timestep_shift=3.0,
             cfg_text_scale=cfg_text_scale,
             cfg_img_scale=cfg_img_scale,
+            cfg_interval=cfg_interval,
+            cfg_renorm_type=cfg_renorm_type,
+            cfg_renorm_min=cfg_renorm_min,
         )
 
         gen_context = {

@@ -722,14 +722,15 @@ def _enqueue_cfg_companions(
             cid = f"{parent_id}{ep.request_id_suffix}"
             companion_prompt = ep.prompt
 
-            # Run through same input processing as the main prompt
+            companion_params, companion_spl = ep.apply_overrides(stage0_params, sampling_params_list)
+
             if isinstance(companion_prompt, dict):
                 _inject_global_id(companion_prompt, cid)
 
             request = self.input_processor.process_inputs(
                 request_id=cid,
                 prompt=companion_prompt,
-                params=stage0_params,
+                params=companion_params,
                 supported_tasks=self.supported_tasks,
             )
             request = _upgrade_to_omni_request(request, companion_prompt)
@@ -750,7 +751,7 @@ def _enqueue_cfg_companions(
                     "parent_id": parent_id,
                     "role": ep.role,
                     "prompt": request,
-                    "sampling_params_list": sampling_params_list,
+                    "sampling_params_list": companion_spl,
                 }
             )