vllm-project · gcanlin · Apr 7, 2026 · Mar 27, 2026
@@ -33,6 +33,7 @@ This folder provides several entrypoints for experimenting with text-to-image di
 | `black-forest-labs/FLUX.2-klein-4B` | 1024 x 1024 | 72.7 | 14.9 |
 | `black-forest-labs/FLUX.2-klein-9B` | 1024 x 1024 | 37.1 | 32.3 |
 | `black-forest-labs/FLUX.2-dev` | 1024 x 1024 | 65.7 | >80 (CPU offload required) |
+| `HunyuanImage-3.0` | 1024 x 1024 | 80.0 (TP≥3)  | 160 |
 
 !!! info
 *Peak VRAM:  based on basic single-card usage, batch size =1, without any acceleration/optimization features. FLUX.2-dev requires `--enable-cpu-offload` on a single 80 GiB GPU.
@@ -90,6 +91,8 @@ python text_to_image.py \
 | `--enable-cpu-offload` | flag | off | Enable CPU offloading for diffusion models |
 | `--lora-path` | str | — | Path to PEFT LoRA adapter folder |
 | `--lora-scale` | float | `1.0` | Scale factor for LoRA weights |
+| `--use-system-prompt` | str | `None` | System prompt preset: `en_unified`, `en_vanilla`, `en_recaption`, `en_think_recaption`, `dynamic`, `None`, or custom text. Recommended: `en_unified`. Only for HunyuanImage-3.0.|
+| `--system-prompt` | str | `None` | Custom system prompt text. Only used when `--use-system-prompt` is set to `custom`. Only for HunyuanImage-3.0.|
 
 **NextStep-1.1 specific arguments:**
 

@@ -242,6 +242,19 @@ def parse_args() -> argparse.Namespace:
         action="store_true",
         help="Enable logging of diffusion pipeline stats.",
     )
+    parser.add_argument(
+        "--use-system-prompt",
+        type=str,
+        default=None,
+        choices=["None", "dynamic", "en_vanilla", "en_recaption", "en_think_recaption", "en_unified", "custom"],
+        help="System prompt preset for generation. Recommended: en_unified.",
+    )
+    parser.add_argument(
+        "--system-prompt",
+        type=str,
+        default=None,
+        help=("Custom system prompt. Used when --use-system-prompt is custom. "),
+    )
     return parser.parse_args()
 
 
@@ -382,13 +395,13 @@ def main():
         )
 
     generation_start = time.perf_counter()
-
     extra_args = {
         "timesteps_shift": args.timesteps_shift,
         "cfg_schedule": args.cfg_schedule,
         "use_norm": args.use_norm,
+        "use_system_prompt": args.use_system_prompt,
+        "system_prompt": args.system_prompt,
     }
-
     if lora_request:
         extra_args["lora_request"] = lora_request
         extra_args["lora_scale"] = args.lora_scale

@@ -231,6 +231,8 @@ count, use `size` and `n` rather than `height`, `width`, or
 | `seed`                   | int   | None    | Random seed (reproducible)     |
 | `negative_prompt`        | str   | None    | Negative prompt                |
 | `num_outputs_per_prompt` | int   | 1       | Number of images to generate   |
+| `use_system_prompt` | str | None | System prompt preset: `en_unified`, `en_vanilla`, `en_recaption`, `en_think_recaption`, `dynamic`, `None`, or custom text string. Only for HunyuanImage-3.0. |
+| `system_prompt` | str | None | Custom system prompt text. Only used when `use_system_prompt` is set to `custom`. Only for HunyuanImage-3.0. |
 
 ## Response Format
 

@@ -28,6 +28,8 @@ def generate_image(
     lora_name: str | None = None,
     lora_scale: float | None = None,
     lora_int_id: int | None = None,
+    use_system_prompt: str | None = None,
+    system_prompt: str | None = None,
 ) -> bytes | None:
     """Generate an image using the images generation API.
 
@@ -45,6 +47,8 @@ def generate_image(
         lora_name: LoRA name (optional, defaults to path stem)
         lora_scale: LoRA scale factor (default: 1.0)
         lora_int_id: LoRA integer ID (optional, derived from path if not provided)
+        use_system_prompt: System prompt for generation.
+        system_prompt: Custom system prompt.
 
     Returns:
         Image bytes or None if failed
@@ -70,7 +74,10 @@ def generate_image(
         payload["negative_prompt"] = negative_prompt
     if seed is not None:
         payload["seed"] = seed
-
+    if use_system_prompt is not None:
+        payload["use_system_prompt"] = use_system_prompt
+    if system_prompt is not None:
+        payload["system_prompt"] = system_prompt
     # Add LoRA if provided
     if lora_path:
         lora_body: dict = {
@@ -128,9 +135,21 @@ def main():
         default=None,
         help="LoRA integer id (cache key). If omitted, the server derives a stable id from lora_path.",
     )
-
+    parser.add_argument(
+        "--use-system-prompt",
+        type=str,
+        default=None,
+        help=(
+            "System prompt for generation. Use predefined types: 'en_unified', 'en_vanilla', 'en_recaption', 'en_think_recaption', 'dynamic', or 'None'; Or provide custom text string directly. Recommended en_unified. "
+        ),
+    )
+    parser.add_argument(
+        "--system-prompt",
+        type=str,
+        default=None,
+        help=("Custom system prompt. Used when --use-system-prompt is custom. "),
+    )
     args = parser.parse_args()
-
     print(f"Generating image for: {args.prompt}")
 
     image_bytes = generate_image(
@@ -146,6 +165,8 @@ def main():
         lora_name=args.lora_name,
         lora_scale=args.lora_scale if args.lora_path else None,
         lora_int_id=args.lora_int_id if args.lora_path else None,
+        use_system_prompt=args.use_system_prompt,
+        system_prompt=args.system_prompt,
     )
 
     if image_bytes: