From 5bbbd09dcf89b281e825b1bd9608eea7eb6d1a85 Mon Sep 17 00:00:00 2001 From: Micah Williamson Date: Wed, 14 Jan 2026 22:23:55 +0000 Subject: [PATCH 1/2] disable async scheduling on ROCm Signed-off-by: Micah Williamson --- tests/v1/entrypoints/llm/test_struct_output_generate.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index d819a92e5d69..cdd40b9eac5b 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -35,6 +35,7 @@ "method": "eagle", "model": "yuhuili/EAGLE-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 5, + "async_scheduling": False, } PARAMS_MODELS_BACKENDS_TOKENIZER_MODE = [ @@ -87,6 +88,10 @@ ("Qwen/Qwen2.5-1.5B-Instruct", "auto"), ] +platform_args = {} +if current_platform.is_rocm(): + platform_args["async_scheduling"] = False + class CarType(str, Enum): sedan = "sedan" @@ -134,6 +139,7 @@ def test_structured_output( load_format="auto" if not model_name.startswith("mistralai/") else "hf", config_format="auto" if not model_name.startswith("mistralai/") else "hf", speculative_config=speculative_config, + **platform_args, ) # @@ -649,6 +655,7 @@ def test_structured_output_with_reasoning_matrices( ), tokenizer_mode=tokenizer_mode, speculative_config=speculative_config, + **({"async_scheduling": False} if current_platform.is_rocm() else {}), async_scheduling=async_scheduling, ) tokenizer = llm.get_tokenizer() From d1c1bf36c6058275bbad821e6a38cb0caf05974c Mon Sep 17 00:00:00 2001 From: Micah Williamson Date: Wed, 14 Jan 2026 22:29:40 +0000 Subject: [PATCH 2/2] remove erroneous params Signed-off-by: Micah Williamson --- tests/v1/entrypoints/llm/test_struct_output_generate.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index cdd40b9eac5b..c6c9c0ce40a1 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -35,7 +35,6 @@ "method": "eagle", "model": "yuhuili/EAGLE-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 5, - "async_scheduling": False, } PARAMS_MODELS_BACKENDS_TOKENIZER_MODE = [ @@ -655,7 +654,6 @@ def test_structured_output_with_reasoning_matrices( ), tokenizer_mode=tokenizer_mode, speculative_config=speculative_config, - **({"async_scheduling": False} if current_platform.is_rocm() else {}), async_scheduling=async_scheduling, ) tokenizer = llm.get_tokenizer()