Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 42 additions & 34 deletions .buildkite/test-merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -272,37 +272,45 @@ steps:
path: /mnt/hf-cache
type: DirectoryOrCreate

# - label: "Bagel Text2Img Model Test with H100"
# timeout_in_minutes: 30
# depends_on: upload-merge-pipeline
# commands:
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
# - pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py
# agents:
# queue: "mithril-h100-pool"
# plugins:
# - kubernetes:
# podSpec:
# containers:
# - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
# resources:
# limits:
# nvidia.com/gpu: 1
# volumeMounts:
# - name: devshm
# mountPath: /dev/shm
# - name: hf-cache
# mountPath: /root/.cache/huggingface
# env:
# - name: HF_HOME
# value: /root/.cache/huggingface
# nodeSelector:
# node.kubernetes.io/instance-type: gpu-h100-sxm
# volumes:
# - name: devshm
# emptyDir:
# medium: Memory
# - name: hf-cache
# hostPath:
# path: /mnt/hf-cache
# type: DirectoryOrCreate
- label: "Bagel Model Test with H100 (Real Weights)"
timeout_in_minutes: 60
depends_on: upload-merge-pipeline
commands:
- |
timeout 55m bash -c '
set -e
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_TEST_CLEAN_GPU_MEMORY=1
export VLLM_IMAGE_FETCH_TIMEOUT=60
pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py -m "advanced_model" --run-level "advanced_model" -k "shared_memory"
pytest -s -v tests/e2e/offline_inference/test_bagel_img2img.py -m "advanced_model" --run-level "advanced_model"
pytest -s -v tests/e2e/online_serving/test_bagel_online.py -m "advanced_model" --run-level "advanced_model"
Comment thread
princepride marked this conversation as resolved.
'
agents:
queue: "mithril-h100-pool"
plugins:
- kubernetes:
podSpec:
containers:
- image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
resources:
limits:
nvidia.com/gpu: 1
volumeMounts:
- name: devshm
mountPath: /dev/shm
- name: hf-cache
mountPath: /root/.cache/huggingface
env:
- name: HF_HOME
value: /root/.cache/huggingface
nodeSelector:
node.kubernetes.io/instance-type: gpu-h100-sxm
volumes:
- name: devshm
emptyDir:
medium: Memory
- name: hf-cache
hostPath:
path: /mnt/hf-cache
type: DirectoryOrCreate
6 changes: 3 additions & 3 deletions .buildkite/test-ready.yml
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ steps:
timeout 30m bash -c '
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_TEST_CLEAN_GPU_MEMORY=1
pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py
pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py -m "core_model" --run-level "core_model"
'
agents:
queue: "mithril-h100-pool"
Expand Down Expand Up @@ -306,7 +306,7 @@ steps:
timeout 30m bash -c '
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_TEST_CLEAN_GPU_MEMORY=1
pytest -s -v tests/e2e/offline_inference/test_bagel_img2img.py
pytest -s -v tests/e2e/offline_inference/test_bagel_img2img.py -m "core_model" --run-level "core_model"
'
agents:
queue: "mithril-h100-pool"
Expand Down Expand Up @@ -345,7 +345,7 @@ steps:
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_TEST_CLEAN_GPU_MEMORY=1
export VLLM_IMAGE_FETCH_TIMEOUT=60
pytest -s -v tests/e2e/online_serving/test_bagel_online.py
pytest -s -v tests/e2e/online_serving/test_bagel_online.py -m "core_model" --run-level "core_model"
'
agents:
queue: "mithril-h100-pool"
Expand Down
13 changes: 6 additions & 7 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1236,15 +1236,14 @@ def omni_server(request: pytest.FixtureRequest, run_level: str, model_prefix: st
port = params.port
stage_config_path = params.stage_config_path
if run_level == "advanced_model" and stage_config_path is not None:
# Dynamically detect stages from config to avoid KeyError
# for models with fewer stages (e.g., BAGEL has 2, Qwen3-Omni has 3)
with open(stage_config_path, encoding="utf-8") as f:
_cfg = yaml.safe_load(f) or {}
_stage_ids = [s["stage_id"] for s in _cfg.get("stage_args", []) if "stage_id" in s]
stage_config_path = modify_stage_config(
stage_config_path,
deletes={
"stage_args": {
0: ["engine_args.load_format"],
1: ["engine_args.load_format"],
2: ["engine_args.load_format"],
}
},
deletes={"stage_args": {sid: ["engine_args.load_format"] for sid in _stage_ids}},
)

server_args = params.server_args or []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ stage_args:
enable_prefix_caching: false
max_num_batched_tokens: 32768
tensor_parallel_size: 1
load_format: dummy
omni_kv_config:
need_send_cache: true
kv_transfer_criteria:
Expand Down Expand Up @@ -54,6 +55,7 @@ stage_args:
enable_prefix_caching: false
max_num_batched_tokens: 32768
tensor_parallel_size: 1
load_format: dummy
omni_kv_config:
need_recv_cache: true
engine_input_source: [0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ stage_args:
enable_prefix_caching: false
max_num_batched_tokens: 32768
tensor_parallel_size: 1
load_format: dummy
omni_kv_config:
need_send_cache: true
kv_transfer_criteria:
Expand Down Expand Up @@ -53,6 +54,7 @@ stage_args:
enable_prefix_caching: false
max_num_batched_tokens: 32768
tensor_parallel_size: 1
load_format: dummy
omni_kv_config:
need_recv_cache: true
engine_input_source: [0]
Expand Down
27 changes: 25 additions & 2 deletions tests/e2e/offline_inference/test_bagel_img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from PIL import Image
from vllm.assets.image import ImageAsset

from tests.conftest import modify_stage_config
from tests.utils import hardware_test
from vllm_omni.entrypoints.omni import Omni

Expand Down Expand Up @@ -168,17 +169,39 @@ def _generate_bagel_img2img(
return generated_image


def _resolve_stage_config(config_path: str, run_level: str) -> str:
"""Resolve stage config based on run level.

For advanced_model (real weights), strip load_format: dummy so the model
falls back to loading real weights from HuggingFace.
"""
if run_level == "advanced_model":
return modify_stage_config(
config_path,
deletes={
"stage_args": {
0: ["engine_args.load_format"],
1: ["engine_args.load_format"],
}
},
)
return config_path


@pytest.mark.core_model
@pytest.mark.advanced_model
@pytest.mark.diffusion
@hardware_test(res={"cuda": "H100"})
def test_bagel_img2img_shared_memory_connector():
def test_bagel_img2img_shared_memory_connector(run_level):
"""Test Bagel img2img with shared memory connector."""
input_image = _load_input_image()
config_path = str(Path(__file__).parent / "stage_configs" / "bagel_sharedmemory_ci.yaml")
config_path = _resolve_stage_config(config_path, run_level)
omni = Omni(model="ByteDance-Seed/BAGEL-7B-MoT", stage_configs_path=config_path, stage_init_timeout=300)

try:
generated_image = _generate_bagel_img2img(omni, input_image)
_validate_pixels(generated_image)
if run_level == "advanced_model":
_validate_pixels(generated_image)
finally:
omni.close()
34 changes: 30 additions & 4 deletions tests/e2e/offline_inference/test_bagel_text2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import pytest
from PIL import Image

from tests.conftest import modify_stage_config
from tests.utils import hardware_test
from vllm_omni.entrypoints.omni import Omni

Expand Down Expand Up @@ -158,17 +159,39 @@ def _generate_bagel_image(omni: Omni, prompt: str = DEFAULT_PROMPT) -> Image.Ima
return generated_image


def _resolve_stage_config(config_path: str, run_level: str) -> str:
"""Resolve stage config based on run level.

For advanced_model (real weights), strip load_format: dummy so the model
falls back to loading real weights from HuggingFace.
"""
if run_level == "advanced_model":
return modify_stage_config(
config_path,
deletes={
"stage_args": {
0: ["engine_args.load_format"],
1: ["engine_args.load_format"],
}
},
)
return config_path


@pytest.mark.core_model
@pytest.mark.advanced_model
@pytest.mark.diffusion
@hardware_test(res={"cuda": "H100"})
def test_bagel_text2img_shared_memory_connector():
def test_bagel_text2img_shared_memory_connector(run_level):
"""Test Bagel text2img with shared memory connector."""
config_path = str(Path(__file__).parent / "stage_configs" / "bagel_sharedmemory_ci.yaml")
config_path = _resolve_stage_config(config_path, run_level)
omni = Omni(model="ByteDance-Seed/BAGEL-7B-MoT", stage_configs_path=config_path, stage_init_timeout=300)

try:
generated_image = _generate_bagel_image(omni)
_validate_pixels(generated_image)
if run_level == "advanced_model":
_validate_pixels(generated_image)
finally:
omni.close()

Expand Down Expand Up @@ -251,9 +274,10 @@ def _load_mooncake_config(host: str, rpc_port: int, http_port: int) -> str:


@pytest.mark.core_model
@pytest.mark.advanced_model
@pytest.mark.diffusion
@hardware_test(res={"cuda": "H100"})
def test_bagel_text2img_mooncake_connector():
def test_bagel_text2img_mooncake_connector(run_level):
"""Test Bagel text2img with Mooncake connector for inter-stage communication."""
MOONCAKE_HOST = "127.0.0.1"
MOONCAKE_RPC_PORT = _find_free_port()
Expand Down Expand Up @@ -291,10 +315,12 @@ def test_bagel_text2img_mooncake_connector():
http_port=MOONCAKE_HTTP_PORT,
)

temp_config_file = _resolve_stage_config(temp_config_file, run_level)
omni = Omni(model="ByteDance-Seed/BAGEL-7B-MoT", stage_configs_path=temp_config_file, stage_init_timeout=300)

generated_image = _generate_bagel_image(omni)
_validate_pixels(generated_image)
if run_level == "advanced_model":
_validate_pixels(generated_image)

finally:
if omni:
Expand Down
Loading
Loading