vllm-project · WoosukKwon · Sep 18, 2025 · Sep 17, 2025 · Sep 17, 2025 · Sep 17, 2025
@@ -28,11 +28,9 @@ def monkeypatch_module():
     mpatch.undo()
 
 
-@pytest.fixture(scope="module", params=[False, True])
-def server(request, monkeypatch_module, zephyr_lora_files):  #noqa: F811
-
-    use_v1 = request.param
-    monkeypatch_module.setenv('VLLM_USE_V1', '1' if use_v1 else '0')
+@pytest.fixture(scope="module")
+def server(monkeypatch_module, zephyr_lora_files):  #noqa: F811
+    monkeypatch_module.setenv('VLLM_USE_V1', '1')
 
     args = [
         # use half precision for speed and memory savings in CI environment
@@ -57,13 +55,6 @@ def server(request, monkeypatch_module, zephyr_lora_files):  #noqa: F811
         yield remote_server
 
 
-@pytest.fixture
-def is_v1_server(server):
-    import os
-    assert os.environ['VLLM_USE_V1'] in ['0', '1']
-    return os.environ['VLLM_USE_V1'] == '1'
-
-
 @pytest_asyncio.fixture
 async def client(server):
     async with server.get_async_client() as async_client:
@@ -481,10 +472,9 @@ async def test_chat_completion_stream_options(client: openai.AsyncOpenAI,
 
 @pytest.mark.asyncio
 async def test_structured_outputs_choice_chat(
-        client: openai.AsyncOpenAI, sample_structured_outputs_choices,
-        is_v1_server: bool):
-    if not is_v1_server:
-        pytest.skip("Structured outputs is only supported in v1 engine")
+    client: openai.AsyncOpenAI,
+    sample_structured_outputs_choices,
+):
     messages = [{
         "role": "system",
         "content": "you are a helpful assistant"
@@ -522,12 +512,10 @@ async def test_structured_outputs_choice_chat(
 
 
 @pytest.mark.asyncio
-async def test_structured_outputs_json_chat(client: openai.AsyncOpenAI,
-                                            sample_json_schema,
-                                            is_v1_server: bool):
-    if not is_v1_server:
-        pytest.skip("Structured outputs is only supported in v1 engine")
-
+async def test_structured_outputs_json_chat(
+    client: openai.AsyncOpenAI,
+    sample_json_schema,
+):
     messages = [{
         "role": "system",
         "content": "you are a helpful assistant"
@@ -569,10 +557,10 @@ async def test_structured_outputs_json_chat(client: openai.AsyncOpenAI,
 
 
 @pytest.mark.asyncio
-async def test_structured_outputs_regex_chat(client: openai.AsyncOpenAI,
-                                             sample_regex, is_v1_server: bool):
-    if not is_v1_server:
-        pytest.skip("Structured outputs is only supported in v1 engine")
+async def test_structured_outputs_regex_chat(
+    client: openai.AsyncOpenAI,
+    sample_regex,
+):
 
     messages = [{
         "role": "system",
@@ -660,10 +648,10 @@ async def test_structured_outputs_choice_chat_logprobs(
 
 
 @pytest.mark.asyncio
-async def test_named_tool_use(client: openai.AsyncOpenAI, sample_json_schema,
-                              is_v1_server: bool):
-    if not is_v1_server:
-        pytest.skip("Tool use is only supported in v1 engine")
+async def test_named_tool_use(
+    client: openai.AsyncOpenAI,
+    sample_json_schema,
+):
     messages = [{
         "role": "system",
         "content": "you are a helpful assistant"
@@ -821,11 +809,7 @@ async def test_response_format_json_object(client: openai.AsyncOpenAI):
 
 
 @pytest.mark.asyncio
-async def test_response_format_json_schema(client: openai.AsyncOpenAI,
-                                           is_v1_server: bool):
-    if not is_v1_server:
-        pytest.skip(
-            "JSON schema response format is only supported in v1 engine")
+async def test_response_format_json_schema(client: openai.AsyncOpenAI):
     prompt = 'what is 1+1? The format is "result": 2'
     # Check that this prompt cannot lead to a valid JSON without json_schema
     for _ in range(2):