vllm-project · david6666666 · Jun 6, 2026
@@ -0,0 +1,30 @@
+# Stage config for running FLUX.2-klein DiT with ModelOpt FP8 auto-detect.
+# The following config is for 2 GPUs.
+
+stage_args:
+  - stage_id: 0
+    stage_type: diffusion
+    runtime:
+      devices: "0,1"
+      max_batch_size: 1
+    engine_args:
+      model_stage: dit
+      model_class_name: Flux2KleinPipeline
+      max_num_seqs: 1
+      enforce_eager: true
+      trust_remote_code: true
+      distributed_executor_backend: "mp"
+      parallel_config:
+        tensor_parallel_size: 2
+
+    final_output: true
+    final_output_type: image
+    is_comprehension: false
+    default_sampling_params:
+      seed: 42
+
+runtime:
+  enabled: true
+  defaults:
+    window_size: -1
+    max_inflight: 1
@@ -0,0 +1,30 @@
+# Stage config for running FLUX.1 DiT with ModelOpt FP8 auto-detect.
+# The following config is for 2 GPUs.
+
+stage_args:
+  - stage_id: 0
+    stage_type: diffusion
+    runtime:
+      devices: "0,1"
+      max_batch_size: 1
+    engine_args:
+      model_stage: dit
+      model_class_name: FluxPipeline
+      max_num_seqs: 1
+      enforce_eager: true
+      trust_remote_code: true
+      distributed_executor_backend: "mp"
+      parallel_config:
+        tensor_parallel_size: 2
+
+    final_output: true
+    final_output_type: image
+    is_comprehension: false
+    default_sampling_params:
+      seed: 42
+
+runtime:
+  enabled: true
+  defaults:
+    window_size: -1
+    max_inflight: 1
@@ -0,0 +1,30 @@
+# Stage config for running Qwen-Image DiT with ModelOpt FP8 auto-detect.
+# The following config is for 2 GPUs.
+
+stage_args:
+  - stage_id: 0
+    stage_type: diffusion
+    runtime:
+      devices: "0,1"
+      max_batch_size: 1
+    engine_args:
+      model_stage: dit
+      model_class_name: QwenImagePipeline
+      max_num_seqs: 1
+      enforce_eager: true
+      trust_remote_code: true
+      distributed_executor_backend: "mp"
+      parallel_config:
+        tensor_parallel_size: 2
+
+    final_output: true
+    final_output_type: image
+    is_comprehension: false
+    default_sampling_params:
+      seed: 42
+
+runtime:
+  enabled: true
+  defaults:
+    window_size: -1
+    max_inflight: 1
@@ -0,0 +1,30 @@
+# Stage config for running Z-Image DiT with ModelOpt FP8 auto-detect.
+# The following config is for 2 GPUs.
+
+stage_args:
+  - stage_id: 0
+    stage_type: diffusion
+    runtime:
+      devices: "0,1"
+      max_batch_size: 1
+    engine_args:
+      model_stage: dit
+      model_class_name: ZImagePipeline
+      max_num_seqs: 1
+      enforce_eager: true
+      trust_remote_code: true
+      distributed_executor_backend: "mp"
+      parallel_config:
+        tensor_parallel_size: 2
+
+    final_output: true
+    final_output_type: image
+    is_comprehension: false
+    default_sampling_params:
+      seed: 42
+
+runtime:
+  enabled: true
+  defaults:
+    window_size: -1
+    max_inflight: 1