diff --git a/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py b/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py index a3df30fb02b2..fedbd74795b5 100644 --- a/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py +++ b/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py @@ -27,7 +27,8 @@ from ....utils import RemoteOpenAIServer # Tuned to prevent OOM on 18GB GPUs in transcription correctness tests. -MAX_SEQS_FOR_TRANSCRIPTION_TEST = 32 +MAX_SEQS_FOR_TRANSCRIPTION_TEST = 8 +GPU_UTIL_FOR_TRANSCRIPTION_TEST = 0.5 def to_bytes(y, sr): @@ -188,6 +189,7 @@ def test_wer_correctness( "--enforce-eager", f"--tokenizer_mode={model_info.tokenizer_mode}", f"--max_num_seqs={MAX_SEQS_FOR_TRANSCRIPTION_TEST}", + f"--gpu_memory_utilization={GPU_UTIL_FOR_TRANSCRIPTION_TEST}", ] if model_info.trust_remote_code: server_args.append("--trust-remote-code")