diff --git a/.buildkite/test-nightly.yml b/.buildkite/test-nightly.yml index ecb910434bc..ad71e061fbb 100644 --- a/.buildkite/test-nightly.yml +++ b/.buildkite/test-nightly.yml @@ -73,7 +73,7 @@ steps: - label: ":full_moon: Diffusion Model Test with H100" timeout_in_minutes: 60 depends_on: upload-nightly-pipeline - # if: build.env("NIGHTLY") == "1" + if: build.env("NIGHTLY") == "1" commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and diffusion and H100" --run-level "advanced_model" @@ -185,7 +185,7 @@ steps: - label: ":email: Nightly Collection & Email" key: nightly-perf-distribution depends_on: - - nightly-performance + - nightly-performance if: build.env("NIGHTLY") == "1" commands: - pip install openpyxl diff --git a/vllm_omni/entrypoints/openai/serving_chat.py b/vllm_omni/entrypoints/openai/serving_chat.py index a60756f2ca6..d5eee3933c7 100644 --- a/vllm_omni/entrypoints/openai/serving_chat.py +++ b/vllm_omni/entrypoints/openai/serving_chat.py @@ -297,7 +297,13 @@ async def create_chat_completion( if not extracted_prompt: return self.create_error_response("No text prompt found in messages") - extra_body = getattr(request, "extra_body", None) or {} + # [NOTE] When sending request via openai client Python library, + # `extra_body` is flattented and merged into the payload's root. + # These extra fields are accessible via `model_extra` property (from Pydantic base class). + # When sending raw request with curl, no flattening happens. Directly read the `extra_body` dict. + extra_body = getattr(request, "extra_body", None) + if not extra_body: + extra_body = request.model_extra or {} height = extra_body.get("height") width = extra_body.get("width") if "size" in extra_body: @@ -2036,7 +2042,13 @@ async def _create_diffusion_chat_completion( # Extract generation parameters from extra_body (preferred) # Reference: text_to_image.py and text_to_video.py for supported parameters - extra_body = getattr(request, "extra_body", None) or {} + # [NOTE] When sending request via openai client Python library, + # `extra_body` is flattented and merged into the payload's root. + # These extra fields are accessible via `model_extra` property (from Pydantic base class). + # When sending raw request with curl, no flattening happens. Directly read the `extra_body` dict. + extra_body = getattr(request, "extra_body", None) + if not extra_body: + extra_body = request.model_extra or {} # Parse size if provided (supports "1024x1024" format) height = extra_body.get("height")