diff --git a/tests/entrypoints/openai/test_realtime_validation.py b/tests/entrypoints/openai/test_realtime_validation.py index 8f12a3764c17..273a034e1773 100644 --- a/tests/entrypoints/openai/test_realtime_validation.py +++ b/tests/entrypoints/openai/test_realtime_validation.py @@ -4,6 +4,7 @@ import asyncio import base64 import json +import warnings import librosa import numpy as np @@ -85,7 +86,41 @@ async def test_multi_chunk_streaming( await send_event(ws, {"type": "session.update", "model": model_name}) - # Send commit to start transcription + # Wait for the server to acknowledge the session update. + try: + while True: + event = await receive_event(ws, timeout=5.0) + if event["type"] == "session.updated": + break + except TimeoutError: + warnings.warn( + f"session.updated not received within {5.0}s after " + "session.update. The server may not implement this event.", + stacklevel=2, + ) + + # (ROCm) Warm-up: send a non-final commit (required to start + # transcription) with a small audio chunk to trigger aiter + # compilation on first use. + await send_event(ws, {"type": "input_audio_buffer.commit"}) + await send_event( + ws, + { + "type": "input_audio_buffer.append", + "audio": mary_had_lamb_audio_chunks[0], + }, + ) + await send_event(ws, {"type": "input_audio_buffer.commit", "final": True}) + + # (ROCm) Drain all warm-up responses with generous timeout for + # JIT compilation + warmup_done = False + while not warmup_done: + event = await receive_event(ws, timeout=360.0) + if event["type"] in ("transcription.done", "error"): + warmup_done = True + + # Now send the real test audio await send_event(ws, {"type": "input_audio_buffer.commit"}) # Send multiple audio chunks @@ -153,6 +188,18 @@ async def test_empty_commit_does_not_crash_engine( await send_event(ws, {"type": "session.update", "model": model_name}) + try: + while True: + event = await receive_event(ws, timeout=5.0) + if event["type"] == "session.updated": + break + except TimeoutError: + warnings.warn( + f"session.updated not received within {5.0}s after " + "session.update. The server may not implement this event.", + stacklevel=2, + ) + # Start generation without sending any audio await send_event(ws, {"type": "input_audio_buffer.commit"}) @@ -161,7 +208,8 @@ async def test_empty_commit_does_not_crash_engine( # We should get *some* response (error or empty transcription), # but the engine must NOT crash. - event = await receive_event(ws, timeout=30.0) + # (ROCm) Use generous timeout for first request (aiter JIT compilation) + event = await receive_event(ws, timeout=360.0) assert event["type"] in ( "error", "transcription.done", @@ -176,6 +224,19 @@ async def test_empty_commit_does_not_crash_engine( await send_event(ws, {"type": "session.update", "model": model_name}) + try: + while True: + event = await receive_event(ws, timeout=5.0) + if event["type"] == "session.updated": + break + except TimeoutError: + warnings.warn( + f"session.updated not received within {5.0}s after " + "session.update. The server may not implement this event.", + stacklevel=2, + ) + + # Start transcription await send_event(ws, {"type": "input_audio_buffer.commit"}) for chunk in mary_had_lamb_audio_chunks: