From 9aa10d343c1975fef81b6c18f536955f191aa2b9 Mon Sep 17 00:00:00 2001 From: TangPeng <85704592@qq.com> Date: Mon, 12 Jan 2026 14:40:05 +0800 Subject: [PATCH 1/2] enable tensor_parallel_size argument with online serving cmd Signed-off-by: TangPeng <85704592@qq.com> --- vllm_omni/entrypoints/async_omni.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm_omni/entrypoints/async_omni.py b/vllm_omni/entrypoints/async_omni.py index 1cd7f506319..0067c0af720 100644 --- a/vllm_omni/entrypoints/async_omni.py +++ b/vllm_omni/entrypoints/async_omni.py @@ -131,15 +131,16 @@ def _create_default_diffusion_stage_cfg(self, kwargs: dict[str, Any]) -> dict[st ulysses_degree = kwargs.get("ulysses_degree") or 1 ring_degree = kwargs.get("ring_degree") or 1 sequence_parallel_size = kwargs.get("sequence_parallel_size") + tensor_parallel_size = kwargs.get("tensor_parallel_size") or 1 if sequence_parallel_size is None: sequence_parallel_size = ulysses_degree * ring_degree - num_devices = sequence_parallel_size + num_devices = sequence_parallel_size * tensor_parallel_size for i in range(1, num_devices): devices += f",{i}" parallel_config = DiffusionParallelConfig( pipeline_parallel_size=1, data_parallel_size=1, - tensor_parallel_size=1, + tensor_parallel_size=tensor_parallel_size, sequence_parallel_size=sequence_parallel_size, ulysses_degree=ulysses_degree, ring_degree=ring_degree, From 0be20fe690807291b8ee9015f1f513d7fb0e3107 Mon Sep 17 00:00:00 2001 From: TangPeng <85704592@qq.com> Date: Tue, 13 Jan 2026 14:17:29 +0800 Subject: [PATCH 2/2] enable enforce_eager argument with online serving cmd Signed-off-by: TangPeng <85704592@qq.com> --- vllm_omni/entrypoints/async_omni.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm_omni/entrypoints/async_omni.py b/vllm_omni/entrypoints/async_omni.py index 0067c0af720..99f872db7f2 100644 --- a/vllm_omni/entrypoints/async_omni.py +++ b/vllm_omni/entrypoints/async_omni.py @@ -162,6 +162,7 @@ def _create_default_diffusion_stage_cfg(self, kwargs: dict[str, Any]) -> dict[st "cache_backend": cache_backend, "cache_config": cache_config, "enable_cpu_offload": kwargs.get("enable_cpu_offload", False), + "enforce_eager": kwargs.get("enforce_eager", False), }, "final_output": True, "final_output_type": "image",