From 5ecac3de77ecadf4a4f6a5e786c1c1fdea93b4fb Mon Sep 17 00:00:00 2001 From: princepride Date: Thu, 19 Mar 2026 03:25:23 +0000 Subject: [PATCH 1/3] [CI] Split BAGEL tests into dummy/real weight tiers (L2/L3) Add tiered testing for BAGEL model following the Qwen3-Omni pattern: - Pre-merge (test-ready): run with load_format: dummy for fast validation - Post-merge (test-merge): run with real weights for pixel-level accuracy Co-Authored-By: Claude Opus 4.6 Signed-off-by: princepride --- .buildkite/test-merge.yml | 75 ++++++++++--------- .buildkite/test-ready.yml | 6 +- .../stage_configs/bagel_mooncake_ci.yaml | 2 + .../stage_configs/bagel_sharedmemory_ci.yaml | 2 + .../offline_inference/test_bagel_img2img.py | 27 ++++++- .../offline_inference/test_bagel_text2img.py | 34 ++++++++- tests/e2e/online_serving/test_bagel_online.py | 32 +++++++- 7 files changed, 131 insertions(+), 47 deletions(-) diff --git a/.buildkite/test-merge.yml b/.buildkite/test-merge.yml index 5479f8ac1e8..5913ff5747e 100644 --- a/.buildkite/test-merge.yml +++ b/.buildkite/test-merge.yml @@ -272,37 +272,44 @@ steps: path: /mnt/hf-cache type: DirectoryOrCreate - # - label: "Bagel Text2Img Model Test with H100" - # timeout_in_minutes: 30 - # depends_on: upload-merge-pipeline - # commands: - # - export VLLM_WORKER_MULTIPROC_METHOD=spawn - # - pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py - # agents: - # queue: "mithril-h100-pool" - # plugins: - # - kubernetes: - # podSpec: - # containers: - # - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - # resources: - # limits: - # nvidia.com/gpu: 1 - # volumeMounts: - # - name: devshm - # mountPath: /dev/shm - # - name: hf-cache - # mountPath: /root/.cache/huggingface - # env: - # - name: HF_HOME - # value: /root/.cache/huggingface - # nodeSelector: - # node.kubernetes.io/instance-type: gpu-h100-sxm - # volumes: - # - name: devshm - # emptyDir: - # medium: Memory - # - name: hf-cache - # hostPath: - # path: /mnt/hf-cache - # type: DirectoryOrCreate + - label: "Bagel Model Test with H100 (Real Weights)" + timeout_in_minutes: 60 + depends_on: upload-merge-pipeline + commands: + - | + timeout 55m bash -c ' + export VLLM_WORKER_MULTIPROC_METHOD=spawn + export VLLM_TEST_CLEAN_GPU_MEMORY=1 + export VLLM_IMAGE_FETCH_TIMEOUT=60 + pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py -m "advanced_model" --run-level "advanced_model" -k "shared_memory" + pytest -s -v tests/e2e/offline_inference/test_bagel_img2img.py -m "advanced_model" --run-level "advanced_model" + pytest -s -v tests/e2e/online_serving/test_bagel_online.py -m "advanced_model" --run-level "advanced_model" + ' + agents: + queue: "mithril-h100-pool" + plugins: + - kubernetes: + podSpec: + containers: + - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + resources: + limits: + nvidia.com/gpu: 1 + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: /root/.cache/huggingface + env: + - name: HF_HOME + value: /root/.cache/huggingface + nodeSelector: + node.kubernetes.io/instance-type: gpu-h100-sxm + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: DirectoryOrCreate diff --git a/.buildkite/test-ready.yml b/.buildkite/test-ready.yml index c8579979c84..77fbcc07eaf 100644 --- a/.buildkite/test-ready.yml +++ b/.buildkite/test-ready.yml @@ -268,7 +268,7 @@ steps: timeout 30m bash -c ' export VLLM_WORKER_MULTIPROC_METHOD=spawn export VLLM_TEST_CLEAN_GPU_MEMORY=1 - pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py + pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py -m "core_model" --run-level "core_model" ' agents: queue: "mithril-h100-pool" @@ -306,7 +306,7 @@ steps: timeout 30m bash -c ' export VLLM_WORKER_MULTIPROC_METHOD=spawn export VLLM_TEST_CLEAN_GPU_MEMORY=1 - pytest -s -v tests/e2e/offline_inference/test_bagel_img2img.py + pytest -s -v tests/e2e/offline_inference/test_bagel_img2img.py -m "core_model" --run-level "core_model" ' agents: queue: "mithril-h100-pool" @@ -345,7 +345,7 @@ steps: export VLLM_WORKER_MULTIPROC_METHOD=spawn export VLLM_TEST_CLEAN_GPU_MEMORY=1 export VLLM_IMAGE_FETCH_TIMEOUT=60 - pytest -s -v tests/e2e/online_serving/test_bagel_online.py + pytest -s -v tests/e2e/online_serving/test_bagel_online.py -m "core_model" --run-level "core_model" ' agents: queue: "mithril-h100-pool" diff --git a/tests/e2e/offline_inference/stage_configs/bagel_mooncake_ci.yaml b/tests/e2e/offline_inference/stage_configs/bagel_mooncake_ci.yaml index dbb93344b99..7f3a3a6f4e4 100644 --- a/tests/e2e/offline_inference/stage_configs/bagel_mooncake_ci.yaml +++ b/tests/e2e/offline_inference/stage_configs/bagel_mooncake_ci.yaml @@ -21,6 +21,7 @@ stage_args: enable_prefix_caching: false max_num_batched_tokens: 32768 tensor_parallel_size: 1 + load_format: dummy omni_kv_config: need_send_cache: true kv_transfer_criteria: @@ -54,6 +55,7 @@ stage_args: enable_prefix_caching: false max_num_batched_tokens: 32768 tensor_parallel_size: 1 + load_format: dummy omni_kv_config: need_recv_cache: true engine_input_source: [0] diff --git a/tests/e2e/offline_inference/stage_configs/bagel_sharedmemory_ci.yaml b/tests/e2e/offline_inference/stage_configs/bagel_sharedmemory_ci.yaml index 721c50248a0..aa3cc77188a 100644 --- a/tests/e2e/offline_inference/stage_configs/bagel_sharedmemory_ci.yaml +++ b/tests/e2e/offline_inference/stage_configs/bagel_sharedmemory_ci.yaml @@ -21,6 +21,7 @@ stage_args: enable_prefix_caching: false max_num_batched_tokens: 32768 tensor_parallel_size: 1 + load_format: dummy omni_kv_config: need_send_cache: true kv_transfer_criteria: @@ -53,6 +54,7 @@ stage_args: enable_prefix_caching: false max_num_batched_tokens: 32768 tensor_parallel_size: 1 + load_format: dummy omni_kv_config: need_recv_cache: true engine_input_source: [0] diff --git a/tests/e2e/offline_inference/test_bagel_img2img.py b/tests/e2e/offline_inference/test_bagel_img2img.py index da9df2778fa..8c734c6a250 100644 --- a/tests/e2e/offline_inference/test_bagel_img2img.py +++ b/tests/e2e/offline_inference/test_bagel_img2img.py @@ -22,6 +22,7 @@ from PIL import Image from vllm.assets.image import ImageAsset +from tests.conftest import modify_stage_config from tests.utils import hardware_test from vllm_omni.entrypoints.omni import Omni @@ -168,17 +169,39 @@ def _generate_bagel_img2img( return generated_image +def _resolve_stage_config(config_path: str, run_level: str) -> str: + """Resolve stage config based on run level. + + For advanced_model (real weights), strip load_format: dummy so the model + falls back to loading real weights from HuggingFace. + """ + if run_level == "advanced_model": + return modify_stage_config( + config_path, + deletes={ + "stage_args": { + 0: ["engine_args.load_format"], + 1: ["engine_args.load_format"], + } + }, + ) + return config_path + + @pytest.mark.core_model +@pytest.mark.advanced_model @pytest.mark.diffusion @hardware_test(res={"cuda": "H100"}) -def test_bagel_img2img_shared_memory_connector(): +def test_bagel_img2img_shared_memory_connector(run_level): """Test Bagel img2img with shared memory connector.""" input_image = _load_input_image() config_path = str(Path(__file__).parent / "stage_configs" / "bagel_sharedmemory_ci.yaml") + config_path = _resolve_stage_config(config_path, run_level) omni = Omni(model="ByteDance-Seed/BAGEL-7B-MoT", stage_configs_path=config_path, stage_init_timeout=300) try: generated_image = _generate_bagel_img2img(omni, input_image) - _validate_pixels(generated_image) + if run_level == "advanced_model": + _validate_pixels(generated_image) finally: omni.close() diff --git a/tests/e2e/offline_inference/test_bagel_text2img.py b/tests/e2e/offline_inference/test_bagel_text2img.py index 7990ac980e1..ed369aedd92 100644 --- a/tests/e2e/offline_inference/test_bagel_text2img.py +++ b/tests/e2e/offline_inference/test_bagel_text2img.py @@ -28,6 +28,7 @@ import pytest from PIL import Image +from tests.conftest import modify_stage_config from tests.utils import hardware_test from vllm_omni.entrypoints.omni import Omni @@ -158,17 +159,39 @@ def _generate_bagel_image(omni: Omni, prompt: str = DEFAULT_PROMPT) -> Image.Ima return generated_image +def _resolve_stage_config(config_path: str, run_level: str) -> str: + """Resolve stage config based on run level. + + For advanced_model (real weights), strip load_format: dummy so the model + falls back to loading real weights from HuggingFace. + """ + if run_level == "advanced_model": + return modify_stage_config( + config_path, + deletes={ + "stage_args": { + 0: ["engine_args.load_format"], + 1: ["engine_args.load_format"], + } + }, + ) + return config_path + + @pytest.mark.core_model +@pytest.mark.advanced_model @pytest.mark.diffusion @hardware_test(res={"cuda": "H100"}) -def test_bagel_text2img_shared_memory_connector(): +def test_bagel_text2img_shared_memory_connector(run_level): """Test Bagel text2img with shared memory connector.""" config_path = str(Path(__file__).parent / "stage_configs" / "bagel_sharedmemory_ci.yaml") + config_path = _resolve_stage_config(config_path, run_level) omni = Omni(model="ByteDance-Seed/BAGEL-7B-MoT", stage_configs_path=config_path, stage_init_timeout=300) try: generated_image = _generate_bagel_image(omni) - _validate_pixels(generated_image) + if run_level == "advanced_model": + _validate_pixels(generated_image) finally: omni.close() @@ -251,9 +274,10 @@ def _load_mooncake_config(host: str, rpc_port: int, http_port: int) -> str: @pytest.mark.core_model +@pytest.mark.advanced_model @pytest.mark.diffusion @hardware_test(res={"cuda": "H100"}) -def test_bagel_text2img_mooncake_connector(): +def test_bagel_text2img_mooncake_connector(run_level): """Test Bagel text2img with Mooncake connector for inter-stage communication.""" MOONCAKE_HOST = "127.0.0.1" MOONCAKE_RPC_PORT = _find_free_port() @@ -291,10 +315,12 @@ def test_bagel_text2img_mooncake_connector(): http_port=MOONCAKE_HTTP_PORT, ) + temp_config_file = _resolve_stage_config(temp_config_file, run_level) omni = Omni(model="ByteDance-Seed/BAGEL-7B-MoT", stage_configs_path=temp_config_file, stage_init_timeout=300) generated_image = _generate_bagel_image(omni) - _validate_pixels(generated_image) + if run_level == "advanced_model": + _validate_pixels(generated_image) finally: if omni: diff --git a/tests/e2e/online_serving/test_bagel_online.py b/tests/e2e/online_serving/test_bagel_online.py index 4056cfdef6d..06fcad56466 100644 --- a/tests/e2e/online_serving/test_bagel_online.py +++ b/tests/e2e/online_serving/test_bagel_online.py @@ -36,6 +36,7 @@ from PIL import Image from vllm.assets.image import ImageAsset +from tests.conftest import modify_stage_config from tests.utils import hardware_test MODEL = "ByteDance-Seed/BAGEL-7B-MoT" @@ -47,6 +48,25 @@ IMG2IMG_PROMPT = "Change the grass color to red" +def _resolve_stage_config(config_path: str, run_level: str) -> str: + """Resolve stage config based on run level. + + For advanced_model (real weights), strip load_format: dummy so the model + falls back to loading real weights from HuggingFace. + """ + if run_level == "advanced_model": + return modify_stage_config( + config_path, + deletes={ + "stage_args": { + 0: ["engine_args.load_format"], + 1: ["engine_args.load_format"], + } + }, + ) + return config_path + + class BagelOmniServer: """Context manager to start/stop a vLLM-Omni server for Bagel model tests.""" @@ -205,11 +225,13 @@ def _extract_image_from_response(data: dict[str, Any]) -> Image.Image | None: @pytest.mark.core_model +@pytest.mark.advanced_model @pytest.mark.diffusion @hardware_test(res={"cuda": "H100"}) -def test_bagel_text2img_online(): +def test_bagel_text2img_online(run_level): """Test Bagel text2img via OpenAI-compatible chat completions API.""" - with BagelOmniServer() as server: + stage_config = _resolve_stage_config(STAGE_CONFIGS_PATH, run_level) + with BagelOmniServer(stage_configs_path=stage_config) as server: response_data = _send_chat_request( server.base_url, TEXT2IMG_PROMPT, @@ -225,13 +247,15 @@ def test_bagel_text2img_online(): @pytest.mark.core_model +@pytest.mark.advanced_model @pytest.mark.diffusion @hardware_test(res={"cuda": "H100"}) -def test_bagel_img2img_online(): +def test_bagel_img2img_online(run_level): """Test Bagel img2img via OpenAI-compatible chat completions API.""" input_image = ImageAsset("2560px-Gfp-wisconsin-madison-the-nature-boardwalk").pil_image.convert("RGB") - with BagelOmniServer() as server: + stage_config = _resolve_stage_config(STAGE_CONFIGS_PATH, run_level) + with BagelOmniServer(stage_configs_path=stage_config) as server: response_data = _send_chat_request( server.base_url, IMG2IMG_PROMPT, From d4551d9a285e60e430a9e5392127b7d8a8eb7f8a Mon Sep 17 00:00:00 2001 From: princepride Date: Thu, 19 Mar 2026 03:35:26 +0000 Subject: [PATCH 2/3] [CI] Add set -e to BAGEL merge step to fail on first test failure Without set -e, only the exit code of the last pytest command is checked, masking failures in earlier test invocations. Co-Authored-By: Claude Opus 4.6 Signed-off-by: princepride --- .buildkite/test-merge.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.buildkite/test-merge.yml b/.buildkite/test-merge.yml index 5913ff5747e..434ef498812 100644 --- a/.buildkite/test-merge.yml +++ b/.buildkite/test-merge.yml @@ -278,6 +278,7 @@ steps: commands: - | timeout 55m bash -c ' + set -e export VLLM_WORKER_MULTIPROC_METHOD=spawn export VLLM_TEST_CLEAN_GPU_MEMORY=1 export VLLM_IMAGE_FETCH_TIMEOUT=60 From 9d7879a49983bdbc0397601ef9dd724887005a7e Mon Sep 17 00:00:00 2001 From: princepride Date: Thu, 19 Mar 2026 03:55:17 +0000 Subject: [PATCH 3/3] [CI] Refactor BAGEL online test to use omni_server + openai_client fixtures Replace custom BagelOmniServer with shared omni_server fixture and openai_client.send_diffusion_request() to unify code style with Qwen3-Omni tests. Also fix omni_server fixture to dynamically detect stage IDs from config instead of hardcoding stages 0/1/2, so it works for models with any number of stages (e.g., BAGEL has 2, Qwen3-Omni has 3). Co-Authored-By: Claude Opus 4.6 Signed-off-by: princepride --- tests/conftest.py | 13 +- tests/e2e/online_serving/test_bagel_online.py | 272 ++++-------------- 2 files changed, 64 insertions(+), 221 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 2460cfd5bda..a624462b346 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1236,15 +1236,14 @@ def omni_server(request: pytest.FixtureRequest, run_level: str, model_prefix: st port = params.port stage_config_path = params.stage_config_path if run_level == "advanced_model" and stage_config_path is not None: + # Dynamically detect stages from config to avoid KeyError + # for models with fewer stages (e.g., BAGEL has 2, Qwen3-Omni has 3) + with open(stage_config_path, encoding="utf-8") as f: + _cfg = yaml.safe_load(f) or {} + _stage_ids = [s["stage_id"] for s in _cfg.get("stage_args", []) if "stage_id" in s] stage_config_path = modify_stage_config( stage_config_path, - deletes={ - "stage_args": { - 0: ["engine_args.load_format"], - 1: ["engine_args.load_format"], - 2: ["engine_args.load_format"], - } - }, + deletes={"stage_args": {sid: ["engine_args.load_format"] for sid in _stage_ids}}, ) server_args = params.server_args or [] diff --git a/tests/e2e/online_serving/test_bagel_online.py b/tests/e2e/online_serving/test_bagel_online.py index 06fcad56466..a5e26db1ea1 100644 --- a/tests/e2e/online_serving/test_bagel_online.py +++ b/tests/e2e/online_serving/test_bagel_online.py @@ -22,23 +22,18 @@ import base64 import os -import signal -import socket -import subprocess -import sys -import time from io import BytesIO from pathlib import Path -from typing import Any import pytest -import requests -from PIL import Image from vllm.assets.image import ImageAsset -from tests.conftest import modify_stage_config +from tests.conftest import OmniServerParams from tests.utils import hardware_test +os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" +os.environ["VLLM_TEST_CLEAN_GPU_MEMORY"] = "1" + MODEL = "ByteDance-Seed/BAGEL-7B-MoT" STAGE_CONFIGS_PATH = str( Path(__file__).parent.parent / "offline_inference" / "stage_configs" / "bagel_sharedmemory_ci.yaml" @@ -47,225 +42,74 @@ TEXT2IMG_PROMPT = "A cute cat" IMG2IMG_PROMPT = "Change the grass color to red" - -def _resolve_stage_config(config_path: str, run_level: str) -> str: - """Resolve stage config based on run level. - - For advanced_model (real weights), strip load_format: dummy so the model - falls back to loading real weights from HuggingFace. - """ - if run_level == "advanced_model": - return modify_stage_config( - config_path, - deletes={ - "stage_args": { - 0: ["engine_args.load_format"], - 1: ["engine_args.load_format"], - } - }, - ) - return config_path - - -class BagelOmniServer: - """Context manager to start/stop a vLLM-Omni server for Bagel model tests.""" - - def __init__( - self, - model: str = MODEL, - stage_configs_path: str = STAGE_CONFIGS_PATH, - env_dict: dict[str, str] | None = None, - ) -> None: - self.model = model - self.stage_configs_path = stage_configs_path - self.env_dict = env_dict - self.proc: subprocess.Popen | None = None - self.host = "127.0.0.1" - self.port = _find_free_port() - - @property - def base_url(self) -> str: - return f"http://{self.host}:{self.port}" - - def _start_server(self) -> None: - env = os.environ.copy() - if self.env_dict is not None: - env.update(self.env_dict) - - cmd = [ - sys.executable, - "-m", - "vllm_omni.entrypoints.cli.main", - "serve", - self.model, - "--omni", - "--host", - self.host, - "--port", - str(self.port), - "--stage-configs-path", - self.stage_configs_path, - "--stage-init-timeout", - "300", - ] - - self.proc = subprocess.Popen( - cmd, - env=env, - start_new_session=True, - ) - - try: - if not _wait_for_port(self.host, self.port, timeout=600, proc=self.proc): - self.terminate() - raise RuntimeError(f"Server failed to start within 600 seconds on {self.host}:{self.port}") - except Exception: - self.terminate() - raise - - def __enter__(self): - self._start_server() - return self - - def terminate(self) -> None: - if self.proc: - try: - os.killpg(os.getpgid(self.proc.pid), signal.SIGTERM) - except ProcessLookupError: - pass - try: - self.proc.wait(timeout=30) - except subprocess.TimeoutExpired: - try: - os.killpg(os.getpgid(self.proc.pid), signal.SIGKILL) - except ProcessLookupError: - pass - self.proc.wait() - self.proc = None - - def __exit__(self, exc_type, exc_val, exc_tb): - self.terminate() - - -def _find_free_port() -> int: - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.bind(("127.0.0.1", 0)) - s.listen(1) - return s.getsockname()[1] - - -def _wait_for_port(host: str, port: int, timeout: int = 600, proc: subprocess.Popen | None = None) -> bool: - start = time.time() - while time.time() - start < timeout: - if proc is not None and proc.poll() is not None: - # Server process exited early - return False - try: - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: - sock.settimeout(1) - if sock.connect_ex((host, port)) == 0: - return True - except Exception: - pass - time.sleep(2) - return False - - -def _send_chat_request( - server_url: str, - prompt: str, - *, - modality: str = "text2img", - image: Image.Image | None = None, - timeout: int = 300, -) -> dict[str, Any]: - """Send a chat completion request matching the openai_chat_client.py format.""" - content: list[dict[str, Any]] = [{"type": "text", "text": f"<|im_start|>{prompt}<|im_end|>"}] - - if image is not None: - buffer = BytesIO() - image.save(buffer, format="JPEG") - b64_data = base64.b64encode(buffer.getvalue()).decode("utf-8") - content.append( - { - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{b64_data}"}, - } - ) - - payload: dict[str, Any] = { - "messages": [{"role": "user", "content": content}], - } - - if modality in ("text2img", "img2img"): - payload["modalities"] = ["image"] - - resp = requests.post( - f"{server_url}/v1/chat/completions", - headers={"Content-Type": "application/json"}, - json=payload, - timeout=timeout, - ) - resp.raise_for_status() - return resp.json() - - -def _extract_image_from_response(data: dict[str, Any]) -> Image.Image | None: - """Extract the generated PIL Image from a chat completion response.""" - for choice in data.get("choices", []): - content = choice.get("message", {}).get("content") - if isinstance(content, list) and content: - first_item = content[0] - if isinstance(first_item, dict) and "image_url" in first_item: - url = first_item["image_url"].get("url", "") - if url.startswith("data:image"): - _, b64 = url.split(",", 1) - return Image.open(BytesIO(base64.b64decode(b64))) - return None +# Create parameter combinations for model and stage config +test_params = [ + OmniServerParams( + model=MODEL, + stage_config_path=STAGE_CONFIGS_PATH, + server_args=["--stage-init-timeout", "300"], + ), +] + + +def _build_text2img_messages(prompt: str) -> list[dict]: + """Build OpenAI-format messages for text2img generation.""" + return [ + { + "role": "user", + "content": [{"type": "text", "text": f"<|im_start|>{prompt}<|im_end|>"}], + } + ] + + +def _build_img2img_messages(prompt: str, image_b64: str) -> list[dict]: + """Build OpenAI-format messages for img2img generation.""" + return [ + { + "role": "user", + "content": [ + {"type": "text", "text": f"<|im_start|>{prompt}<|im_end|>"}, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}, + }, + ], + } + ] @pytest.mark.core_model @pytest.mark.advanced_model @pytest.mark.diffusion @hardware_test(res={"cuda": "H100"}) -def test_bagel_text2img_online(run_level): +@pytest.mark.parametrize("omni_server", test_params, indirect=True) +def test_bagel_text2img_online(omni_server, openai_client) -> None: """Test Bagel text2img via OpenAI-compatible chat completions API.""" - stage_config = _resolve_stage_config(STAGE_CONFIGS_PATH, run_level) - with BagelOmniServer(stage_configs_path=stage_config) as server: - response_data = _send_chat_request( - server.base_url, - TEXT2IMG_PROMPT, - modality="text2img", - ) - - image = _extract_image_from_response(response_data) - assert image is not None, f"No image in response: {response_data}" - image.load() + request_config = { + "model": omni_server.model, + "messages": _build_text2img_messages(TEXT2IMG_PROMPT), + "modalities": ["image"], + } - w, h = image.size - assert w > 0 and h > 0, f"Invalid image size: {image.size}" + openai_client.send_diffusion_request(request_config) @pytest.mark.core_model @pytest.mark.advanced_model @pytest.mark.diffusion @hardware_test(res={"cuda": "H100"}) -def test_bagel_img2img_online(run_level): +@pytest.mark.parametrize("omni_server", test_params, indirect=True) +def test_bagel_img2img_online(omni_server, openai_client) -> None: """Test Bagel img2img via OpenAI-compatible chat completions API.""" input_image = ImageAsset("2560px-Gfp-wisconsin-madison-the-nature-boardwalk").pil_image.convert("RGB") + buffer = BytesIO() + input_image.save(buffer, format="JPEG") + image_b64 = base64.b64encode(buffer.getvalue()).decode("utf-8") + + request_config = { + "model": omni_server.model, + "messages": _build_img2img_messages(IMG2IMG_PROMPT, image_b64), + "modalities": ["image"], + } - stage_config = _resolve_stage_config(STAGE_CONFIGS_PATH, run_level) - with BagelOmniServer(stage_configs_path=stage_config) as server: - response_data = _send_chat_request( - server.base_url, - IMG2IMG_PROMPT, - modality="img2img", - image=input_image, - ) - - image = _extract_image_from_response(response_data) - assert image is not None, f"No image in response: {response_data}" - image.load() - - w, h = image.size - assert w > 0 and h > 0, f"Invalid image size: {image.size}" + openai_client.send_diffusion_request(request_config)