Skip to content
4 changes: 2 additions & 2 deletions .buildkite/test-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ steps:
- label: ":full_moon: Diffusion Model Test with H100"
timeout_in_minutes: 60
depends_on: upload-nightly-pipeline
# if: build.env("NIGHTLY") == "1"
if: build.env("NIGHTLY") == "1"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and diffusion and H100" --run-level "advanced_model"
Expand Down Expand Up @@ -185,7 +185,7 @@ steps:
- label: ":email: Nightly Collection & Email"
key: nightly-perf-distribution
depends_on:
- nightly-performance
- nightly-performance
if: build.env("NIGHTLY") == "1"
commands:
- pip install openpyxl
Expand Down
16 changes: 14 additions & 2 deletions vllm_omni/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,13 @@ async def create_chat_completion(
if not extracted_prompt:
return self.create_error_response("No text prompt found in messages")

extra_body = getattr(request, "extra_body", None) or {}
# [NOTE] When sending request via openai client Python library,
# `extra_body` is flattented and merged into the payload's root.
# These extra fields are accessible via `model_extra` property (from Pydantic base class).
# When sending raw request with curl, no flattening happens. Directly read the `extra_body` dict.
extra_body = getattr(request, "extra_body", None)
if not extra_body:
extra_body = request.model_extra or {}
height = extra_body.get("height")
width = extra_body.get("width")
if "size" in extra_body:
Expand Down Expand Up @@ -2036,7 +2042,13 @@ async def _create_diffusion_chat_completion(

# Extract generation parameters from extra_body (preferred)
# Reference: text_to_image.py and text_to_video.py for supported parameters
extra_body = getattr(request, "extra_body", None) or {}
# [NOTE] When sending request via openai client Python library,
# `extra_body` is flattented and merged into the payload's root.
# These extra fields are accessible via `model_extra` property (from Pydantic base class).
# When sending raw request with curl, no flattening happens. Directly read the `extra_body` dict.
extra_body = getattr(request, "extra_body", None)
if not extra_body:
extra_body = request.model_extra or {}

# Parse size if provided (supports "1024x1024" format)
height = extra_body.get("height")
Expand Down
Loading