diff --git a/tests/entrypoints/openai/test_realtime_validation.py b/tests/entrypoints/openai/test_realtime_validation.py
index 8f12a3764c17..273a034e1773 100644
--- a/tests/entrypoints/openai/test_realtime_validation.py
+++ b/tests/entrypoints/openai/test_realtime_validation.py
@@ -4,6 +4,7 @@
 import asyncio
 import base64
 import json
+import warnings
 
 import librosa
 import numpy as np
@@ -85,7 +86,41 @@ async def test_multi_chunk_streaming(
 
             await send_event(ws, {"type": "session.update", "model": model_name})
 
-            # Send commit to start transcription
+            # Wait for the server to acknowledge the session update.
+            try:
+                while True:
+                    event = await receive_event(ws, timeout=5.0)
+                    if event["type"] == "session.updated":
+                        break
+            except TimeoutError:
+                warnings.warn(
+                    f"session.updated not received within {5.0}s after "
+                    "session.update. The server may not implement this event.",
+                    stacklevel=2,
+                )
+
+            # (ROCm) Warm-up: send a non-final commit (required to start
+            # transcription) with a small audio chunk to trigger aiter
+            # compilation on first use.
+            await send_event(ws, {"type": "input_audio_buffer.commit"})
+            await send_event(
+                ws,
+                {
+                    "type": "input_audio_buffer.append",
+                    "audio": mary_had_lamb_audio_chunks[0],
+                },
+            )
+            await send_event(ws, {"type": "input_audio_buffer.commit", "final": True})
+
+            # (ROCm) Drain all warm-up responses with generous timeout for
+            # JIT compilation
+            warmup_done = False
+            while not warmup_done:
+                event = await receive_event(ws, timeout=360.0)
+                if event["type"] in ("transcription.done", "error"):
+                    warmup_done = True
+
+            # Now send the real test audio
             await send_event(ws, {"type": "input_audio_buffer.commit"})
 
             # Send multiple audio chunks
@@ -153,6 +188,18 @@ async def test_empty_commit_does_not_crash_engine(
 
             await send_event(ws, {"type": "session.update", "model": model_name})
 
+            try:
+                while True:
+                    event = await receive_event(ws, timeout=5.0)
+                    if event["type"] == "session.updated":
+                        break
+            except TimeoutError:
+                warnings.warn(
+                    f"session.updated not received within {5.0}s after "
+                    "session.update. The server may not implement this event.",
+                    stacklevel=2,
+                )
+
             # Start generation without sending any audio
             await send_event(ws, {"type": "input_audio_buffer.commit"})
 
@@ -161,7 +208,8 @@ async def test_empty_commit_does_not_crash_engine(
 
             # We should get *some* response (error or empty transcription),
             # but the engine must NOT crash.
-            event = await receive_event(ws, timeout=30.0)
+            # (ROCm) Use generous timeout for first request (aiter JIT compilation)
+            event = await receive_event(ws, timeout=360.0)
             assert event["type"] in (
                 "error",
                 "transcription.done",
@@ -176,6 +224,19 @@ async def test_empty_commit_does_not_crash_engine(
 
             await send_event(ws, {"type": "session.update", "model": model_name})
 
+            try:
+                while True:
+                    event = await receive_event(ws, timeout=5.0)
+                    if event["type"] == "session.updated":
+                        break
+            except TimeoutError:
+                warnings.warn(
+                    f"session.updated not received within {5.0}s after "
+                    "session.update. The server may not implement this event.",
+                    stacklevel=2,
+                )
+
+            # Start transcription
             await send_event(ws, {"type": "input_audio_buffer.commit"})
 
             for chunk in mary_had_lamb_audio_chunks: