diff --git a/vllm_omni/model_executor/stage_configs/flux2_klein_dit_2gpu_fp8.yaml b/vllm_omni/model_executor/stage_configs/flux2_klein_dit_2gpu_fp8.yaml new file mode 100644 index 00000000000..0b4ebe8efd4 --- /dev/null +++ b/vllm_omni/model_executor/stage_configs/flux2_klein_dit_2gpu_fp8.yaml @@ -0,0 +1,30 @@ +# Stage config for running FLUX.2-klein DiT with ModelOpt FP8 auto-detect. +# The following config is for 2 GPUs. + +stage_args: + - stage_id: 0 + stage_type: diffusion + runtime: + devices: "0,1" + max_batch_size: 1 + engine_args: + model_stage: dit + model_class_name: Flux2KleinPipeline + max_num_seqs: 1 + enforce_eager: true + trust_remote_code: true + distributed_executor_backend: "mp" + parallel_config: + tensor_parallel_size: 2 + + final_output: true + final_output_type: image + is_comprehension: false + default_sampling_params: + seed: 42 + +runtime: + enabled: true + defaults: + window_size: -1 + max_inflight: 1 diff --git a/vllm_omni/model_executor/stage_configs/flux_dit_2gpu_fp8.yaml b/vllm_omni/model_executor/stage_configs/flux_dit_2gpu_fp8.yaml new file mode 100644 index 00000000000..45e4ebeff3d --- /dev/null +++ b/vllm_omni/model_executor/stage_configs/flux_dit_2gpu_fp8.yaml @@ -0,0 +1,30 @@ +# Stage config for running FLUX.1 DiT with ModelOpt FP8 auto-detect. +# The following config is for 2 GPUs. + +stage_args: + - stage_id: 0 + stage_type: diffusion + runtime: + devices: "0,1" + max_batch_size: 1 + engine_args: + model_stage: dit + model_class_name: FluxPipeline + max_num_seqs: 1 + enforce_eager: true + trust_remote_code: true + distributed_executor_backend: "mp" + parallel_config: + tensor_parallel_size: 2 + + final_output: true + final_output_type: image + is_comprehension: false + default_sampling_params: + seed: 42 + +runtime: + enabled: true + defaults: + window_size: -1 + max_inflight: 1 diff --git a/vllm_omni/model_executor/stage_configs/qwen_image_dit_2gpu_fp8.yaml b/vllm_omni/model_executor/stage_configs/qwen_image_dit_2gpu_fp8.yaml new file mode 100644 index 00000000000..1f0b60a7724 --- /dev/null +++ b/vllm_omni/model_executor/stage_configs/qwen_image_dit_2gpu_fp8.yaml @@ -0,0 +1,30 @@ +# Stage config for running Qwen-Image DiT with ModelOpt FP8 auto-detect. +# The following config is for 2 GPUs. + +stage_args: + - stage_id: 0 + stage_type: diffusion + runtime: + devices: "0,1" + max_batch_size: 1 + engine_args: + model_stage: dit + model_class_name: QwenImagePipeline + max_num_seqs: 1 + enforce_eager: true + trust_remote_code: true + distributed_executor_backend: "mp" + parallel_config: + tensor_parallel_size: 2 + + final_output: true + final_output_type: image + is_comprehension: false + default_sampling_params: + seed: 42 + +runtime: + enabled: true + defaults: + window_size: -1 + max_inflight: 1 diff --git a/vllm_omni/model_executor/stage_configs/z_image_dit_2gpu_fp8.yaml b/vllm_omni/model_executor/stage_configs/z_image_dit_2gpu_fp8.yaml new file mode 100644 index 00000000000..7d94a18cb26 --- /dev/null +++ b/vllm_omni/model_executor/stage_configs/z_image_dit_2gpu_fp8.yaml @@ -0,0 +1,30 @@ +# Stage config for running Z-Image DiT with ModelOpt FP8 auto-detect. +# The following config is for 2 GPUs. + +stage_args: + - stage_id: 0 + stage_type: diffusion + runtime: + devices: "0,1" + max_batch_size: 1 + engine_args: + model_stage: dit + model_class_name: ZImagePipeline + max_num_seqs: 1 + enforce_eager: true + trust_remote_code: true + distributed_executor_backend: "mp" + parallel_config: + tensor_parallel_size: 2 + + final_output: true + final_output_type: image + is_comprehension: false + default_sampling_params: + seed: 42 + +runtime: + enabled: true + defaults: + window_size: -1 + max_inflight: 1