diff --git a/.buildkite/test-merge.yml b/.buildkite/test-merge.yml index fc1f7a67969..7bee193191e 100644 --- a/.buildkite/test-merge.yml +++ b/.buildkite/test-merge.yml @@ -259,7 +259,6 @@ steps: depends_on: upload-merge-pipeline commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export VLLM_TEST_CLEAN_GPU_MEMORY="1" - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py - pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py -m "advanced_model" --run-level "advanced_model" - pytest -s -v tests/e2e/online_serving/test_mimo_audio.py -m "advanced_model" --run-level "advanced_model" diff --git a/tests/conftest.py b/tests/conftest.py index 4539fec2722..bdaa71e57de 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -48,6 +48,7 @@ from vllm_omni.entrypoints.omni import Omni from vllm_omni.inputs.data import OmniSamplingParams from vllm_omni.outputs import OmniRequestOutput +from vllm_omni.platforms import current_omni_platform logger = init_logger(__name__) @@ -962,7 +963,7 @@ def convert_audio_to_text(audio_data): Convert base64 encoded audio data to text using speech recognition. """ audio_data = base64.b64decode(audio_data) - output_path = f"./test_{int(time.time())}.wav" + output_path = f"./test_{uuid.uuid4().hex}.wav" with open(output_path, "wb") as audio_file: audio_file.write(audio_data) @@ -986,8 +987,24 @@ def _merge_base64_audio_to_segment(base64_list: list[str]): def _whisper_transcribe_in_current_process(output_path: str) -> str: import whisper - # Keep Whisper on CPU to avoid consuming GPU memory in tests. - model = whisper.load_model("small", device="cpu") + # Multi-GPU: use last visible device to avoid colliding with default device 0; single device uses 0. + device_index = None + if current_omni_platform.is_available(): + n = current_omni_platform.get_device_count() + if n == 1: + device_index = 0 + elif n > 1: + device_index = n - 1 + + if device_index is not None: + torch_device = current_omni_platform.get_torch_device(device_index) + current_omni_platform.set_device(torch_device) + device = str(torch_device) + use_accelerator = True + else: + use_accelerator = False + device = "cpu" + model = whisper.load_model("small", device=device) try: text = model.transcribe( output_path, @@ -998,6 +1015,9 @@ def _whisper_transcribe_in_current_process(output_path: str) -> str: finally: del model gc.collect() + if use_accelerator: + current_omni_platform.synchronize() + current_omni_platform.empty_cache() return text or ""