Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .buildkite/test-merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ steps:
- "/fsx/hf_cache:/fsx/hf_cache"

- label: "Diffusion Sequence Parallelism Test"
timeout_in_minutes: 20
timeout_in_minutes: 25
depends_on: upload-merge-pipeline
commands:
- pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py tests/diffusion/distributed/test_ulysses_uaa_perf.py
Expand Down
3 changes: 1 addition & 2 deletions .buildkite/test-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ steps:
- export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1"
- pytest -s -v tests/dfx/perf/scripts/run_benchmark.py
- buildkite-agent artifact upload "tests/dfx/perf/results/*.json"
- buildkite-agent artifact upload "tests/dfx/perf/results/*.html"
agents:
queue: "mithril-h100-pool"
plugins:
Expand Down Expand Up @@ -244,7 +243,7 @@ steps:
- export DEFAULT_OUTPUT_DIR=tests/dfx/perf/results
- buildkite-agent artifact download "tests/dfx/perf/results/*.json" . --step nightly-omni-performance
- buildkite-agent artifact download "tests/dfx/perf/results/*.json" . --step nightly-qwen-image-performance
- buildkite-agent artifact download "tests/dfx/perf/results/*.html" . --step nightly-omni-performance
- buildkite-agent artifact download "tests/dfx/perf/results/*.html" . --step nightly-testcase-statistics
- python tools/nightly/generate_nightly_perf_excel.py
- python tools/nightly/generate_nightly_perf_html.py
- python tools/nightly/send_nightly_email.py --report-file "tests/dfx/perf/results/*.xlsx, tests/dfx/perf/results/*.html"
Expand Down
8 changes: 6 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1771,8 +1771,12 @@ def omni_server(request: pytest.FixtureRequest, run_level: str, model_prefix: st
server_args = params.server_args or []
if params.use_omni and params.stage_init_timeout is not None:
server_args = [*server_args, "--stage-init-timeout", str(params.stage_init_timeout)]
else:
server_args = [*server_args, "--stage-init-timeout", "600"]
if params.init_timeout is not None:
server_args = [*server_args, "--init-timeout", str(params.init_timeout)]
else:
server_args = [*server_args, "--init-timeout", "900"]
if params.use_stage_cli:
if not params.use_omni:
raise ValueError("omni_server with use_stage_cli=True requires use_omni=True")
Expand Down Expand Up @@ -2870,9 +2874,9 @@ def __init__(
self,
model_name: str,
seed: int = 42,
stage_init_timeout: int = 300,
stage_init_timeout: int = 600,
batch_timeout: int = 10,
init_timeout: int = 300,
init_timeout: int = 900,
shm_threshold_bytes: int = 65536,
log_stats: bool = False,
stage_configs_path: str | None = None,
Expand Down
15 changes: 7 additions & 8 deletions tests/e2e/offline_inference/test_bagel_img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
from PIL import Image
from vllm.assets.image import ImageAsset

from tests.conftest import modify_stage_config
from tests.conftest import OmniRunner, modify_stage_config
from tests.utils import hardware_test
from vllm_omni.entrypoints.omni import Omni
from vllm_omni import Omni
from vllm_omni.platforms import current_omni_platform

# Reference pixel data extracted from the known-good output image
Expand Down Expand Up @@ -210,11 +210,10 @@ def test_bagel_img2img_shared_memory_connector(run_level):
input_image = _load_input_image()
config_path = str(Path(__file__).parent / "stage_configs" / "bagel_sharedmemory_ci.yaml")
config_path = _resolve_stage_config(config_path, run_level)
omni = Omni(model="ByteDance-Seed/BAGEL-7B-MoT", stage_configs_path=config_path, stage_init_timeout=300)

try:
generated_image = _generate_bagel_img2img(omni, input_image)
with OmniRunner(
"ByteDance-Seed/BAGEL-7B-MoT",
stage_configs_path=config_path,
) as runner:
generated_image = _generate_bagel_img2img(runner.omni, input_image)
if run_level == "advanced_model":
_validate_pixels(generated_image)
finally:
omni.close()
11 changes: 4 additions & 7 deletions tests/e2e/offline_inference/test_bagel_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from vllm_omni.outputs import OmniRequestOutput

os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
os.environ["VLLM_TEST_CLEAN_GPU_MEMORY"] = "1"

from pathlib import Path

Expand All @@ -32,9 +31,9 @@
from PIL import Image
from safetensors.torch import save_file

from tests.conftest import modify_stage_config
from tests.conftest import OmniRunner, modify_stage_config
from tests.utils import hardware_test
from vllm_omni.entrypoints.omni import Omni
from vllm_omni import Omni
from vllm_omni.lora.request import LoRARequest
from vllm_omni.lora.utils import stable_lora_int_id

Expand Down Expand Up @@ -154,8 +153,8 @@ def _make_file_lora_request(adapter_dir: Path) -> LoRARequest:
def test_bagel_lora_scale_and_deactivation(run_level, tmp_path):
"""Validate LoRA effect, bounded perturbation, and clean deactivation."""
config_path = _resolve_stage_config(BAGEL_STAGE_CONFIG, run_level)
omni = Omni(model=MODEL, stage_configs_path=config_path, stage_init_timeout=300)
try:
with OmniRunner(MODEL, stage_configs_path=config_path) as runner:
omni = runner.omni
lora_request = _make_file_lora_request(tmp_path / "bagel_lora")

# 1) Baseline (no LoRA)
Expand Down Expand Up @@ -194,5 +193,3 @@ def test_bagel_lora_scale_and_deactivation(run_level, tmp_path):

# (d) Deactivation fully restores base model
assert diff_restored == 0.0, f"Base model not restored after LoRA deactivation: diff={diff_restored}"
finally:
omni.close()
32 changes: 15 additions & 17 deletions tests/e2e/offline_inference/test_bagel_text2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import os

os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
os.environ["VLLM_TEST_CLEAN_GPU_MEMORY"] = "1"
import signal
import socket
import subprocess
Expand All @@ -28,9 +27,9 @@
import pytest
from PIL import Image

from tests.conftest import modify_stage_config
from tests.conftest import OmniRunner, modify_stage_config
from tests.utils import hardware_test
from vllm_omni.entrypoints.omni import Omni
from vllm_omni import Omni
from vllm_omni.platforms import current_omni_platform

# Reference pixel data extracted from the known-good output image
Expand Down Expand Up @@ -199,14 +198,13 @@ def test_bagel_text2img_shared_memory_connector(run_level):
"""Test Bagel text2img with shared memory connector."""
config_path = str(Path(__file__).parent / "stage_configs" / "bagel_sharedmemory_ci.yaml")
config_path = _resolve_stage_config(config_path, run_level)
omni = Omni(model="ByteDance-Seed/BAGEL-7B-MoT", stage_configs_path=config_path, stage_init_timeout=300)

try:
generated_image = _generate_bagel_image(omni)
with OmniRunner(
"ByteDance-Seed/BAGEL-7B-MoT",
stage_configs_path=config_path,
) as runner:
generated_image = _generate_bagel_image(runner.omni)
if run_level == "advanced_model":
_validate_pixels(generated_image)
finally:
omni.close()


def _wait_for_port(host: str, port: int, timeout: int = 30) -> bool:
Expand Down Expand Up @@ -319,7 +317,6 @@ def test_bagel_text2img_mooncake_connector(run_level):

mooncake_master_proc = None
temp_config_file = None
omni = None

try:
_cleanup_mooncake_processes()
Expand Down Expand Up @@ -349,15 +346,16 @@ def test_bagel_text2img_mooncake_connector(run_level):
)

temp_config_file = _resolve_stage_config(temp_config_file, run_level)
omni = Omni(model="ByteDance-Seed/BAGEL-7B-MoT", stage_configs_path=temp_config_file, stage_init_timeout=300)

generated_image = _generate_bagel_image(omni)
if run_level == "advanced_model":
_validate_pixels(generated_image)
with OmniRunner(
"ByteDance-Seed/BAGEL-7B-MoT",
stage_configs_path=temp_config_file,
stage_init_timeout=300,
) as runner:
generated_image = _generate_bagel_image(runner.omni)
if run_level == "advanced_model":
_validate_pixels(generated_image)

finally:
if omni:
omni.close()
if temp_config_file:
try:
os.unlink(temp_config_file)
Expand Down
27 changes: 9 additions & 18 deletions tests/e2e/offline_inference/test_bagel_understanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,13 @@
import os

os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
os.environ["VLLM_TEST_CLEAN_GPU_MEMORY"] = "1"
from pathlib import Path

import pytest
from vllm.assets.image import ImageAsset

from tests.conftest import modify_stage_config
from tests.conftest import OmniRunner, modify_stage_config
from tests.utils import hardware_test
from vllm_omni.entrypoints.omni import Omni

MODEL_NAME = "ByteDance-Seed/BAGEL-7B-MoT"
STAGE_CONFIG = str(Path(__file__).parent / "stage_configs" / "bagel_sharedmemory_ci.yaml")
Expand Down Expand Up @@ -76,13 +74,11 @@ def _extract_text(omni_outputs: list) -> str:
def test_bagel_text2text(run_level):
"""Test Bagel text2text produces correct text output."""
config_path = _resolve_stage_config(STAGE_CONFIG, run_level)
omni = Omni(
model=MODEL_NAME,
with OmniRunner(
MODEL_NAME,
stage_configs_path=config_path,
stage_init_timeout=300,
)

try:
) as runner:
omni = runner.omni
prompt = "<|im_start|>user\nWhere is the capital of France?<|im_end|>\n<|im_start|>assistant\n"
params_list = omni.default_sampling_params_list
omni_outputs = list(
Expand All @@ -100,8 +96,6 @@ def test_bagel_text2text(run_level):
assert text == REFERENCE_TEXT_TEXT2TEXT, (
f"Text mismatch: expected {REFERENCE_TEXT_TEXT2TEXT!r}, got {text!r}"
)
finally:
omni.close()


@pytest.mark.core_model
Expand All @@ -112,13 +106,12 @@ def test_bagel_img2text(run_level):
"""Test Bagel img2text produces correct text output."""
input_image = ImageAsset("2560px-Gfp-wisconsin-madison-the-nature-boardwalk").pil_image.convert("RGB")
config_path = _resolve_stage_config(STAGE_CONFIG, run_level)
omni = Omni(
model=MODEL_NAME,
with OmniRunner(
MODEL_NAME,
stage_configs_path=config_path,
stage_init_timeout=300,
)

try:
) as runner:
omni = runner.omni
prompt = "<|im_start|>user\n<|image_pad|>\nPlease describe this image<|im_end|>\n<|im_start|>assistant\n"
params_list = omni.default_sampling_params_list
omni_outputs = list(
Expand All @@ -140,5 +133,3 @@ def test_bagel_img2text(run_level):

if run_level == "advanced_model":
assert text == REFERENCE_TEXT_IMG2TEXT, f"Text mismatch: expected {REFERENCE_TEXT_IMG2TEXT!r}, got {text!r}"
finally:
omni.close()
35 changes: 7 additions & 28 deletions tests/e2e/offline_inference/test_cache_dit.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,15 @@
It uses minimal settings to keep test time short for CI.
"""

import os
import sys
from pathlib import Path

import pytest
import torch

from tests.conftest import OmniRunner
from tests.utils import hardware_test
from vllm_omni.inputs.data import OmniDiffusionSamplingParams

# ruff: noqa: E402
REPO_ROOT = Path(__file__).resolve().parents[2]
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))

from vllm_omni import Omni
from vllm_omni.outputs import OmniRequestOutput
from vllm_omni.platforms import current_omni_platform

os.environ["VLLM_TEST_CLEAN_GPU_MEMORY"] = "1"

# Use random weights model for testing
models = ["riverclouds/qwen_image_random"]

Expand All @@ -48,20 +36,17 @@ def test_cache_dit(model_name: str):
"residual_diff_threshold": 0.24,
"max_continuous_cached_steps": 3,
}
m = None
try:
m = Omni(
model=model_name,
cache_backend="cache_dit",
cache_config=cache_config,
)

with OmniRunner(
model_name,
cache_backend="cache_dit",
cache_config=cache_config,
) as runner:
# Use minimal settings for fast testing
height = 256
width = 256
num_inference_steps = 4 # Minimal steps for fast test

outputs = m.generate(
outputs = runner.omni.generate(
"a photo of a cat sitting on a laptop keyboard",
OmniDiffusionSamplingParams(
height=height,
Expand Down Expand Up @@ -90,9 +75,3 @@ def test_cache_dit(model_name: str):
# Check image size
assert images[0].width == width
assert images[0].height == height
except Exception as e:
print(f"Test failed with error: {e}")
raise
finally:
if m is not None and hasattr(m, "close"):
m.close()
43 changes: 17 additions & 26 deletions tests/e2e/offline_inference/test_diffusion_cpu_offload.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,14 @@
import gc
import sys
from pathlib import Path

import pytest
import torch
from vllm.distributed.parallel_state import cleanup_dist_env_and_memory

from tests.conftest import OmniRunner
from tests.utils import DeviceMemoryMonitor, hardware_test
from vllm_omni.inputs.data import OmniDiffusionSamplingParams
from vllm_omni.platforms import current_omni_platform

# ruff: noqa: E402
REPO_ROOT = Path(__file__).resolve().parents[2]
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))

from vllm_omni import Omni

models = ["riverclouds/qwen_image_random"]


Expand All @@ -27,30 +19,29 @@ def inference(model_name: str, offload: bool = True):
current_omni_platform.reset_peak_memory_stats()
monitor = DeviceMemoryMonitor(device_index=device_index, interval=0.02)
monitor.start()
m = Omni(
model=model_name,
with OmniRunner(
model_name,
# TODO: we might want to add overlapped feature e2e tests
# cache_backend="cache_dit",
enable_cpu_offload=offload,
)
current_omni_platform.reset_peak_memory_stats()
height = 256
width = 256
) as runner:
current_omni_platform.reset_peak_memory_stats()
height = 256
width = 256

m.generate(
"a photo of a cat sitting on a laptop keyboard",
OmniDiffusionSamplingParams(
height=height,
width=width,
num_inference_steps=9,
guidance_scale=0.0,
generator=torch.Generator(device=current_omni_platform.device_type).manual_seed(42),
),
)
runner.omni.generate(
"a photo of a cat sitting on a laptop keyboard",
OmniDiffusionSamplingParams(
height=height,
width=width,
num_inference_steps=9,
guidance_scale=0.0,
generator=torch.Generator(device=current_omni_platform.device_type).manual_seed(42),
),
)
peak = monitor.peak_used_mb
monitor.stop()

del m
gc.collect()
current_omni_platform.empty_cache()

Expand Down
Loading
Loading