Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ run_and_track_test() {
# --- Actual Test Execution ---
run_and_track_test 1 "test_struct_output_generate.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py -k \"not test_structured_output_with_reasoning_matrices\""
"python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_struct_output_generate.py -k \"not test_structured_output_with_reasoning_matrices\""
run_and_track_test 2 "test_moe_pallas.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py"
run_and_track_test 3 "test_lora.py" \
Expand Down
34 changes: 4 additions & 30 deletions .buildkite/test-amd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -483,19 +483,6 @@ steps:
- pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "tensor_parallelism"


- label: Entrypoints V1 # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
agent_pool: mi250_1
optional: true
working_dir: "/vllm-workspace/tests"
source_file_dependencies:
- vllm/
- tests/v1
commands:
- pytest -v -s v1/entrypoints


- label: V1 Sample + Logits # TBD
timeout_in_minutes: 60
mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
Expand Down Expand Up @@ -1173,14 +1160,14 @@ steps:
- vllm/v1/engine/
- vllm/v1/worker/
- tests/v1/distributed
- tests/v1/entrypoints/openai/test_multi_api_servers.py
- tests/entrypoints/openai/test_multi_api_servers.py
- vllm/platforms/rocm.py
commands:
- export TORCH_NCCL_BLOCKING_WAIT=1
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
- DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
- DP_SIZE=2 pytest -v -s entrypoints/openai/test_multi_api_servers.py


- label: Distributed Compile + RPC Tests (2 GPUs) # TBD
Expand Down Expand Up @@ -1770,19 +1757,6 @@ steps:
- pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "eagle_correctness_heavy"


- label: Entrypoints V1 # 25.7m
timeout_in_minutes: 45
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
agent_pool: mi325_1
optional: true
working_dir: "/vllm-workspace/tests"
source_file_dependencies:
- vllm/
- tests/v1
commands:
- pytest -v -s v1/entrypoints


- label: V1 Spec Decode # TBD
timeout_in_minutes: 40
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
Expand Down Expand Up @@ -2395,14 +2369,14 @@ steps:
- vllm/v1/engine/
- vllm/v1/worker/
- tests/v1/distributed
- tests/v1/entrypoints/openai/test_multi_api_servers.py
- tests/entrypoints/openai/test_multi_api_servers.py
- vllm/platforms/rocm.py
commands:
- export TORCH_NCCL_BLOCKING_WAIT=1
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
- DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
- DP_SIZE=2 pytest -v -s entrypoints/openai/test_multi_api_servers.py


- label: Distributed Compile + RPC Tests (2 GPUs) # 56.1m
Expand Down
4 changes: 2 additions & 2 deletions .buildkite/test_areas/distributed.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ steps:
- vllm/v1/engine/
- vllm/v1/worker/
- tests/v1/distributed
- tests/v1/entrypoints/openai/test_multi_api_servers.py
- tests/entrypoints/openai/test_multi_api_servers.py
commands:
# https://github.com/NVIDIA/nccl/issues/1838
- export NCCL_CUMEM_HOST_ENABLE=0
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
- DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
- DP_SIZE=2 pytest -v -s entrypoints/openai/test_multi_api_servers.py

- label: Distributed Compile + RPC Tests (2 GPUs)
timeout_in_minutes: 20
Expand Down
15 changes: 1 addition & 14 deletions .buildkite/test_areas/entrypoints.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ steps:
- tests/entrypoints/test_chat_utils
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses --ignore=entrypoints/openai/test_multi_api_servers.py
- pytest -v -s entrypoints/test_chat_utils.py
mirror:
amd:
Expand Down Expand Up @@ -75,19 +75,6 @@ steps:
commands:
- pytest -v -s entrypoints/openai/responses

- label: Entrypoints V1
timeout_in_minutes: 50
source_file_dependencies:
- vllm/
- tests/v1
commands:
- pytest -v -s v1/entrypoints
mirror:
amd:
device: mi325_1
depends_on:
- image-build-amd

- label: OpenAI API Correctness
timeout_in_minutes: 30
source_file_dependencies:
Expand Down
4 changes: 2 additions & 2 deletions .buildkite/test_areas/model_runner_v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ steps:
- vllm/v1/attention/
- tests/v1/engine/test_llm_engine.py
- tests/v1/e2e/
- tests/v1/entrypoints/llm/test_struct_output_generate.py
- tests/entrypoints/llm/test_struct_output_generate.py
commands:
- set -x
- export VLLM_USE_V2_MODEL_RUNNER=1
Expand All @@ -22,7 +22,7 @@ steps:
- pytest -v -s v1/e2e/general/test_context_length.py
- pytest -v -s v1/e2e/general/test_min_tokens.py
# Temporary hack filter to exclude ngram spec decoding based tests.
- pytest -v -s v1/entrypoints/llm/test_struct_output_generate.py -k "xgrammar and not speculative_config6 and not speculative_config7 and not speculative_config8 and not speculative_config0"
- pytest -v -s entrypoints/llm/test_struct_output_generate.py -k "xgrammar and not speculative_config6 and not speculative_config7 and not speculative_config8 and not speculative_config0"

- label: Model Runner V2 Examples
timeout_in_minutes: 45
Expand Down
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson
/tests/multimodal @DarkLight1337 @ywang96 @NickLucche
/tests/quantization @mgoin @robertgshaw2-redhat @yewentao256 @pavanimajety
/tests/test_inputs.py @DarkLight1337 @ywang96
/tests/v1/entrypoints/llm/test_struct_output_generate.py @mgoin @russellb @aarnphm
/tests/entrypoints/llm/test_struct_output_generate.py @mgoin @russellb @aarnphm
/tests/v1/structured_output @mgoin @russellb @aarnphm
/tests/v1/core @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @alexm-redhat @heheda12345 @ApostaC @orozery
/tests/weight_loading @mgoin @youkaichao @yewentao256
Expand Down
2 changes: 1 addition & 1 deletion .github/mergify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ pull_request_rules:
- files=examples/offline_inference/structured_outputs.py
- files=examples/online_serving/structured_outputs/structured_outputs.py
- files~=^tests/v1/structured_output/
- files=tests/v1/entrypoints/llm/test_struct_output_generate.py
- files=tests/entrypoints/llm/test_struct_output_generate.py
- files~=^vllm/v1/structured_output/
actions:
label:
Expand Down
52 changes: 49 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@

from tblib import pickling_support

# Import fixture
from tests.v1.entrypoints.conftest import sample_json_schema # noqa

# ruff: noqa

# Install support for pickling exceptions so that we can nicely propagate
Expand Down Expand Up @@ -81,6 +78,55 @@

logger = init_logger(__name__)


@pytest.fixture
def sample_json_schema():
return {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"skills": {
"type": "array",
"items": {
"type": "string",
},
},
"grade": {
"type": "string",
"pattern": "^[A-D]$",
},
"email": {
"type": "string",
"pattern": "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$",
},
"work_history": {
"type": "array",
"items": {
"type": "object",
"properties": {
"company": {"type": "string"},
"duration": {
"type": "number",
"minimum": 0.0,
"maximum": 100.0,
},
"position": {"type": "string"},
},
"required": ["company", "duration", "position"],
"additionalProperties": False,
},
"minItems": 0,
"maxItems": 3,
},
},
"required": ["name", "age", "skills", "grade", "email", "work_history"],
"additionalProperties": False,
"minProperties": 1,
"maxProperties": 10,
}


_TEST_DIR = os.path.dirname(__file__)
_TEST_PROMPTS = [os.path.join(_TEST_DIR, "prompts", "example.txt")]
_LONG_PROMPTS = [os.path.join(_TEST_DIR, "prompts", "summary.txt")]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,108 @@
StructuredOutputsParams,
)

SAMPLE_REGEX = (
r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}"
r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)"
)

# Note: Ensure this only uses attributes compatible with xgrammar
SAMPLE_JSON_SCHEMA = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"skills": {
"type": "array",
"items": {
"type": "string",
},
},
"grade": {
"type": "string",
"pattern": "^[A-D]$", # Regex pattern
},
"email": {
"type": "string",
"pattern": "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$",
},
"work_history": {
"type": "array",
"items": {
"type": "object",
"properties": {
"company": {"type": "string"},
"duration": {
"type": "number",
"minimum": 0.0,
"maximum": 100.0, # Numeric range
},
"position": {"type": "string"},
},
"required": ["company", "duration", "position"],
"additionalProperties": False,
},
"minItems": 0,
"maxItems": 3,
},
},
"required": ["name", "age", "skills", "grade", "email", "work_history"],
"additionalProperties": False,
"minProperties": 1,
"maxProperties": 10,
}

# A schema unsupported by xgrammar
UNSUPPORTED_JSON_SCHEMA = {
"type": "object",
"properties": {
"score": {
"type": "integer",
"multipleOf": 5, # Numeric multiple
},
"tags": {
"type": "array",
"items": {"type": "string", "minLength": 10, "maxLength": 20},
},
},
"required": ["score", "tags"],
"additionalProperties": False,
"patternProperties": {
"^score$": {"type": "integer"},
},
}

SAMPLE_STRUCTURED_OUTPUTS_CHOICES = [
"Python",
"Java",
"JavaScript",
"C++",
"C#",
"PHP",
"TypeScript",
"Ruby",
"Swift",
"Kotlin",
]

SAMPLE_SQL_EBNF = """
root ::= select_statement
select_statement ::= "SELECT" column "from" table "where" condition
column ::= "col_1" | "col_2"
table ::= "table_1" | "table_2"
condition ::= column "=" number
number ::= "1" | "2"
"""

SAMPLE_SQL_LARK = """
start: select_statement
select_statement: "SELECT" column "from" table "where" condition
column: "col_1" | "col_2"
table: "table_1" | "table_2"
condition: column "=" number
number: "1" | "2"
"""

NGRAM_SPEC_CONFIG = {
"model": "[ngram]",
"num_speculative_tokens": 5,
Expand Down Expand Up @@ -110,17 +212,17 @@ class CarDescription(BaseModel):
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE,
)
def test_structured_output(
sample_json_schema: dict[str, Any],
unsupported_json_schema: dict[str, Any],
sample_sql_ebnf: str,
sample_sql_lark: str,
sample_regex: str,
sample_structured_outputs_choices: str,
backend: str,
tokenizer_mode: str,
model_name: str,
speculative_config: dict[str, Any],
):
sample_json_schema = SAMPLE_JSON_SCHEMA
unsupported_json_schema = UNSUPPORTED_JSON_SCHEMA
sample_sql_ebnf = SAMPLE_SQL_EBNF
sample_sql_lark = SAMPLE_SQL_LARK
sample_regex = SAMPLE_REGEX
sample_structured_outputs_choices = SAMPLE_STRUCTURED_OUTPUTS_CHOICES
if current_platform.is_tpu() and speculative_config:
pytest.skip("TPU does not support speculative decoding")

Expand Down Expand Up @@ -702,10 +804,10 @@ def test_structured_output_with_reasoning_matrices(

@pytest.mark.parametrize("model_name, tokenizer_mode", PARAMS_MODELS_TOKENIZER_MODE)
def test_structured_output_auto_mode(
unsupported_json_schema: dict[str, Any],
model_name: str,
tokenizer_mode: str,
):
unsupported_json_schema = UNSUPPORTED_JSON_SCHEMA
llm = LLM(
model=model_name,
max_model_len=1024,
Expand Down Expand Up @@ -808,9 +910,9 @@ def generate_with_backend(backend):

@pytest.mark.parametrize("backend", ["guidance", "xgrammar", "outlines"])
def test_structured_output_batched_with_non_structured_outputs_requests(
sample_json_schema: dict[str, Any],
backend: str,
):
sample_json_schema = SAMPLE_JSON_SCHEMA
# Don't use eager execution on TPUs because we want to test for no
# recompilation at runtime
enforce_eager = bool(not current_platform.is_tpu())
Expand Down
Empty file removed tests/v1/entrypoints/__init__.py
Empty file.
Loading
Loading