vllm-project · micah-wil · Jan 14, 2026 · Jan 16, 2026 · Jan 16, 2026 · gemini-code-assist
diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py
@@ -87,10 +87,6 @@
     ("Qwen/Qwen2.5-1.5B-Instruct", "auto"),
 ]
 
-platform_args = {}
-if current_platform.is_rocm():
-    platform_args["async_scheduling"] = False
-
 
 class CarType(str, Enum):
     sedan = "sedan"
@@ -138,7 +134,6 @@ def test_structured_output(
         load_format="auto" if not model_name.startswith("mistralai/") else "hf",
         config_format="auto" if not model_name.startswith("mistralai/") else "hf",
         speculative_config=speculative_config,
-        **platform_args,
     )
 
     #

@@ -463,7 +463,9 @@ def update_req_spec_token_ids(
         # _prepare_input_ids.
         start_index = self.num_tokens_no_spec[req_index]
         end_token_index = start_index + num_spec_tokens
-        self.token_ids_cpu[req_index, start_index:end_token_index] = spec_token_ids
+        # Replace -1 values with 0 to avoid embedding lookup errors
+        safe_spec_token_ids = [tok if tok != -1 else 0 for tok in spec_token_ids]
+        self.token_ids_cpu[req_index, start_index:end_token_index] = safe_spec_token_ids
         cur_spec_token_ids.extend(spec_token_ids)
 
     def remove_request(self, req_id: str) -> int | None: