Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
26a4e8d
add time cost log for different stages
SamitHuang Mar 6, 2026
b3b70a8
reduce hop3 overhead
SamitHuang Mar 6, 2026
104d71c
perf: reduce IPC overhead for single-stage diffusion serving
SamitHuang Mar 6, 2026
bf2ddb0
perf: reduce IPC overhead for single-stage diffusion serving (~6.5s, …
SamitHuang Mar 6, 2026
735b2ca
Merge branch 'main' into main
SamitHuang Mar 6, 2026
dd4468c
fix conflicts
SamitHuang Mar 6, 2026
ff62a1e
rm redundancy
SamitHuang Mar 9, 2026
870963e
Merge branch 'main' into main
SamitHuang Mar 9, 2026
5414a42
rm logs
SamitHuang Mar 9, 2026
e3dec54
fix inline
SamitHuang Mar 9, 2026
2cd9f9f
fix ci
SamitHuang Mar 9, 2026
172040a
fix ci
SamitHuang Mar 9, 2026
0a86fc5
fix log
SamitHuang Mar 9, 2026
9b9c597
fix
SamitHuang Mar 9, 2026
bda0f2d
fix log
SamitHuang Mar 9, 2026
3452ad3
Merge branch 'main' of https://github.com/samithuang/vllm-omni
SamitHuang Mar 9, 2026
9ab7c55
Merge remote-tracking branch 'upstream/main'
SamitHuang Mar 12, 2026
c32a78a
Merge remote-tracking branch 'upstream/main'
SamitHuang Mar 12, 2026
91afe27
Merge branch 'main' of https://github.com/samithuang/vllm-omni
SamitHuang Mar 13, 2026
9b783a6
Merge branch 'main' of https://github.com/samithuang/vllm-omni
SamitHuang Mar 16, 2026
cf286ec
Merge branch 'main' of https://github.com/vllm-project/vllm-omni
SamitHuang Mar 18, 2026
3f2beee
Merge branch 'main' of https://github.com/samithuang/vllm-omni
SamitHuang Mar 18, 2026
140ef4a
[Bugfix] Fix config misalignment between offline and online diffusion…
SamitHuang Mar 18, 2026
979a890
Merge remote-tracking branch 'origin/main' into fix/diffusion-config-…
SamitHuang Mar 19, 2026
63b1715
fix tru cfg scale parsing
SamitHuang Mar 19, 2026
15c3eb1
Merge branch 'main' into fix/diffusion-config-alignment-offline-online
hsliuustc0106 Mar 19, 2026
f3c21ab
[Bugfix] Convert RGB input to RGBA for Qwen-Image-Layered pipeline
SamitHuang Mar 19, 2026
6026495
Merge branch 'fix/diffusion-config-alignment-offline-online' of https…
SamitHuang Mar 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ def pre_process_func(
else:
image = cast(PIL.Image.Image | torch.Tensor | np.ndarray, raw_image)

if isinstance(image, PIL.Image.Image) and image.mode != "RGBA":
image = image.convert("RGBA")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lgtm


# 1. calculate dimensions
image_size = image.size
assert request.sampling_params.resolution in [640, 1024], (
Expand Down Expand Up @@ -652,6 +655,8 @@ def forward(
width = req.sampling_params.width
else:
# fallback to run pre-processing in pipeline (debug only)
if isinstance(image, PIL.Image.Image) and image.mode != "RGBA":
image = image.convert("RGBA")
image_size = image[0].size if isinstance(image, list) else image.size
assert resolution in [640, 1024], f"resolution must be either 640 or 1024, but got {resolution}"
calculated_width, calculated_height = calculate_dimensions(
Expand Down
20 changes: 13 additions & 7 deletions vllm_omni/diffusion/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,23 @@ class OmniDiffusionRequest:

def __post_init__(self):
"""Initialize dependent fields after dataclass initialization."""
# Detect whether user explicitly provided guidance_scale.
# The sentinel default is 0.0 (false-like); any truthy value means
# the caller set it intentionally. We must resolve this BEFORE
# auto-filling guidance_scale_2, otherwise the sentinel leaks into
# guidance_scale_2.
if self.sampling_params.guidance_scale:
self.sampling_params.guidance_scale_provided = True
else:
self.sampling_params.guidance_scale = 1.0

# Set do_classifier_free_guidance based on guidance scale and negative prompt
if self.sampling_params.guidance_scale > 1.0 and any(
(not isinstance(p, str) and p.get("negative_prompt")) for p in self.prompts
):
self.sampling_params.do_classifier_free_guidance = True

# Auto-fill guidance_scale_2 from the (now-resolved) guidance_scale
# so downstream code always has a valid value.
if self.sampling_params.guidance_scale_2 is None:
self.sampling_params.guidance_scale_2 = self.sampling_params.guidance_scale

# The dataclass default value is 0 (false-like), used to detect whether user explicitly provides this value
# After this check is done, reset this value to old default 1
if self.sampling_params.guidance_scale:
self.sampling_params.guidance_scale_provided = True
else:
self.sampling_params.guidance_scale = 1.0
1 change: 1 addition & 0 deletions vllm_omni/diffusion/worker/diffusion_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ def execute_model(self, req: OmniDiffusionRequest) -> DiffusionOutput:
not getattr(req, "skip_cache_refresh", False)
and self.cache_backend is not None
and self.cache_backend.is_enabled()
and req.sampling_params.num_inference_steps is not None
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Always refresh cache state even when steps are omitted

The new num_inference_steps is not None gate prevents cache_backend.refresh(...) from running for requests that rely on pipeline defaults (now common after this commit). That leaves per-request cache state stale across generations: for example, TeaCacheBackend.refresh is the hook reset path and is documented/implemented as required before each generation. With cache enabled and omitted step count, subsequent requests can reuse prior residual/counter state and produce incorrect outputs.

Useful? React with 👍 / 👎.

):
self.cache_backend.refresh(self.pipeline, req.sampling_params.num_inference_steps)

Expand Down
3 changes: 0 additions & 3 deletions vllm_omni/entrypoints/async_omni_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,6 @@ async def generate(
if request_id is None:
request_id = f"diff-{uuid.uuid4().hex[:16]}"

if sampling_params.guidance_scale:
sampling_params.guidance_scale_provided = True

if lora_request is not None:
sampling_params.lora_request = lora_request

Expand Down
28 changes: 18 additions & 10 deletions vllm_omni/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2050,20 +2050,26 @@ async def _create_diffusion_chat_completion(
except ValueError:
logger.warning("Invalid size format: %s", extra_body.get("size"))

# Get request parameters from extra_body
# Text-to-image parameters (ref: text_to_image.py)
num_inference_steps = extra_body.get("num_inference_steps", 50)
# Get request parameters from extra_body.
# Avoid hardcoded defaults here — let each pipeline's forward()
# method apply its own model-specific default when the user does
# not provide a value.
num_inference_steps = extra_body.get("num_inference_steps")
guidance_scale = extra_body.get("guidance_scale")
true_cfg_scale = extra_body.get("true_cfg_scale") # Qwen-Image specific
true_cfg_scale = extra_body.get("true_cfg_scale") or extra_body.get("cfg_scale")
seed = extra_body.get("seed")
negative_prompt = extra_body.get("negative_prompt")
num_outputs_per_prompt = extra_body.get("num_outputs_per_prompt", 1)

# Text-to-video parameters (ref: text_to_video.py)
num_frames = extra_body.get("num_frames")
guidance_scale_2 = extra_body.get("guidance_scale_2") # For video high-noise CFG
guidance_scale_2 = extra_body.get("guidance_scale_2")
lora_body = extra_body.get("lora")

# Qwen-Image-Layered parameters
layers = extra_body.get("layers")
resolution = extra_body.get("resolution")

logger.info(
"Diffusion chat request %s: prompt=%r, ref_images=%d, params=%s",
request_id,
Expand All @@ -2087,25 +2093,27 @@ async def _create_diffusion_chat_completion(
"negative_prompt": negative_prompt,
}
gen_params = OmniDiffusionSamplingParams(
num_inference_steps=num_inference_steps,
height=height,
width=width,
num_outputs_per_prompt=num_outputs_per_prompt,
seed=seed,
)

# Only override defaults when the user explicitly provides values
if num_inference_steps is not None:
gen_params.num_inference_steps = num_inference_steps
if guidance_scale is not None:
gen_params.guidance_scale = guidance_scale

# Add Qwen-Image specific parameter
if true_cfg_scale is not None:
gen_params.true_cfg_scale = true_cfg_scale

# Add video generation parameters if set
if num_frames is not None:
gen_params.num_frames = num_frames
if guidance_scale_2 is not None:
gen_params.guidance_scale_2 = guidance_scale_2
if layers is not None:
gen_params.layers = layers
if resolution is not None:
gen_params.resolution = resolution

# Parse per-request LoRA (works for both AsyncOmniDiffusion and AsyncOmni).
if lora_body and isinstance(lora_body, dict):
Expand Down
5 changes: 3 additions & 2 deletions vllm_omni/inputs/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,9 @@ class OmniDiffusionSamplingParams:
step_index: int | None = None
boundary_ratio: float | None = None

# Scheduler parameters
num_inference_steps: int = 50
# Scheduler parameters – ``None`` means "not explicitly set by the caller";
# each pipeline's ``forward()`` decides its own model-specific default.
num_inference_steps: int | None = None
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Restore a concrete default for num_inference_steps

Changing OmniDiffusionSamplingParams.num_inference_steps to None breaks pipelines that still require an explicit step count. In DreamIDOmniPipeline.forward, the value is read directly and passed into get_scheduler_time_steps without a fallback, and FlowUniPCMultistepScheduler.set_timesteps asserts that num_inference_steps is not None; requests that omit this field now fail at runtime instead of using the previous default behavior.

Useful? React with 👍 / 👎.

guidance_scale: float = 0.0
guidance_scale_provided: bool = False
guidance_scale_2: float | None = None
Expand Down
Loading