From 271007152075e7ede95de3e0c896aed4726c02f0 Mon Sep 17 00:00:00 2001 From: Hanchenli Date: Fri, 12 Sep 2025 01:03:53 +0000 Subject: [PATCH 1/5] added newest structural tag support for xgrammar Signed-off-by: Hanchenli --- .gitignore | 1 + .../llm/test_struct_output_generate.py | 51 +++++++++++++++++++ vllm/entrypoints/openai/protocol.py | 20 ++++++-- vllm/v1/structured_output/backend_xgrammar.py | 42 +++++++++------ vllm/v1/worker/gpu_model_runner.py | 1 - 5 files changed, 93 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index b1df673e83ca..2dc2e0426103 100644 --- a/.gitignore +++ b/.gitignore @@ -218,3 +218,4 @@ csrc/moe/marlin_moe_wna16/kernel_* # Ignore ep_kernels_workspace folder ep_kernels_workspace/ +xgrammar/ \ No newline at end of file diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index 126d8ce8c8e0..b551928ab412 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -814,3 +814,54 @@ def test_structured_output_batched_with_non_guided_requests( # non-guided requests should not return a valid JSON here with pytest.raises(ValueError): output_json = json.loads(generated_text) + + +@pytest.mark.parametrize("guided_decoding_backend", ["xgrammar"]) +def test_structured_output_with_structural_tag( + monkeypatch: pytest.MonkeyPatch, + guided_decoding_backend: str, +): + monkeypatch.setenv("VLLM_USE_V1", "1") + + llm = LLM( + model="Qwen/Qwen2.5-1.5B-Instruct", + guided_decoding_backend=guided_decoding_backend, + ) + + structural_tag_config = { + "type": "structural_tag", + "format": { + "type": + "triggered_tags", + "tags": [{ + "begin": "hello_flag", + "content": { + "type": "any_text" + }, + "end": "hello" + }], + "triggers": ["hello"], + "stop_after_first": + False + } + } + + sampling_params = SamplingParams( + temperature=0.0, + max_tokens=500, + guided_decoding=GuidedDecodingParams( + structural_tag=json.dumps(structural_tag_config)), + ) + + prompt = "Hello and repete hello 10 times, do not say anything else. Only say hello hello hello, now start" + outputs = llm.generate(prompt, + sampling_params=sampling_params, + use_tqdm=True) + assert outputs is not None + for output in outputs: + assert output is not None + assert isinstance(output, RequestOutput) + prompt = output.prompt + generated_text = output.outputs[0].text + assert generated_text is not None + assert "hello_flag" in generated_text, f"Expected 'hello_flag' to be in generated text, but got: {generated_text}" diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index c8ecbd28e7db..a681f0632f73 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -154,7 +154,7 @@ class JsonSchemaResponseFormat(OpenAIBaseModel): strict: Optional[bool] = None -class StructuralTag(OpenAIBaseModel): +class LegacyStructuralTag(OpenAIBaseModel): begin: str # schema is the field, but that causes conflicts with pydantic so # instead use structural_tag_schema with an alias @@ -163,19 +163,29 @@ class StructuralTag(OpenAIBaseModel): end: str -class StructuralTagResponseFormat(OpenAIBaseModel): +class LegacyStructuralTagResponseFormat(OpenAIBaseModel): type: Literal["structural_tag"] - structures: list[StructuralTag] + structures: list[LegacyStructuralTag] triggers: list[str] +class StructuralTagResponseFormat(OpenAIBaseModel): + type: Literal["structural_tag"] + format: Any + + +AnyStructuralTagResponseFormat = Union[LegacyStructuralTagResponseFormat, + StructuralTagResponseFormat] + + class ResponseFormat(OpenAIBaseModel): # type must be "json_schema", "json_object", or "text" type: Literal["text", "json_object", "json_schema"] json_schema: Optional[JsonSchemaResponseFormat] = None -AnyResponseFormat = Union[ResponseFormat, StructuralTagResponseFormat] +AnyResponseFormat = Union[ResponseFormat, LegacyStructuralTagResponseFormat, + StructuralTagResponseFormat] class StreamOptions(OpenAIBaseModel): @@ -683,7 +693,7 @@ def to_sampling_params( elif self.response_format.type == "structural_tag": structural_tag = self.response_format assert structural_tag is not None and isinstance( - structural_tag, StructuralTagResponseFormat) + structural_tag, AnyStructuralTagResponseFormat) s_tag_obj = structural_tag.model_dump(by_alias=True) self.structural_tag = json.dumps(s_tag_obj) diff --git a/vllm/v1/structured_output/backend_xgrammar.py b/vllm/v1/structured_output/backend_xgrammar.py index 5e00f6380416..605cd22b8ff4 100644 --- a/vllm/v1/structured_output/backend_xgrammar.py +++ b/vllm/v1/structured_output/backend_xgrammar.py @@ -101,14 +101,19 @@ def compile_grammar(self, request_type: StructuredOutputOptions, ctx = self.compiler.compile_regex(grammar_spec) elif request_type == StructuredOutputOptions.STRUCTURAL_TAG: s_tag = json.loads(grammar_spec) - tags = [ - xgr.StructuralTagItem( - begin=s["begin"], - schema=json.dumps(s["schema"]), - end=s["end"], - ) for s in s_tag["structures"] - ] - ctx = self.compiler.compile_structural_tag(tags, s_tag["triggers"]) + if "structures" in s_tag: + #Falling back to deprecated method of compiling structural tag + tags = [ + xgr.StructuralTagItem( + begin=s["begin"], + schema=json.dumps(s["schema"]), + end=s["end"], + ) for s in s_tag["structures"] + ] + ctx = self.compiler.compile_structural_tag( + tags, s_tag["triggers"]) + else: + ctx = self.compiler.compile_structural_tag(grammar_spec) else: logger.error( "Validation should have already occurred. Please file an issue." @@ -311,13 +316,18 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None: if gd_params.structural_tag: try: s_tag = json.loads(gd_params.structural_tag) - tags = [ - xgr.StructuralTagItem( - begin=s["begin"], - schema=json.dumps(s["schema"]), - end=s["end"], - ) for s in s_tag["structures"] - ] - xgr.Grammar.from_structural_tag(tags, s_tag["triggers"]) + + # Using the deprecated method of compiling structural tag + if "structures" in s_tag: + tags = [ + xgr.StructuralTagItem( + begin=s["begin"], + schema=json.dumps(s["schema"]), + end=s["end"], + ) for s in s_tag["structures"] + ] + xgr.Grammar.from_structural_tag(tags, s_tag["triggers"]) + else: + xgr.Grammar.from_structural_tag(gd_params.structural_tag) except Exception as e: raise ValueError("Invalid structural tag specification.") from e diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 33f4d65a7a11..0ee1def2d11d 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -92,7 +92,6 @@ if TYPE_CHECKING: import xgrammar as xgr - from vllm.model_executor.model_loader.tensorizer import TensorizerConfig from vllm.v1.core.sched.output import SchedulerOutput else: From 4cd746bb28c3ed1eefa171363816b4e8e3e1c6ab Mon Sep 17 00:00:00 2001 From: Hanchenli Date: Wed, 24 Sep 2025 18:01:19 +0000 Subject: [PATCH 2/5] cleanup Signed-off-by: Hanchenli --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 2dc2e0426103..b1df673e83ca 100644 --- a/.gitignore +++ b/.gitignore @@ -218,4 +218,3 @@ csrc/moe/marlin_moe_wna16/kernel_* # Ignore ep_kernels_workspace folder ep_kernels_workspace/ -xgrammar/ \ No newline at end of file From 47941cdf8b5242518772e18fdde979331904b913 Mon Sep 17 00:00:00 2001 From: Aaron Pham Date: Fri, 3 Oct 2025 23:18:13 -0400 Subject: [PATCH 3/5] Update tests/v1/entrypoints/llm/test_struct_output_generate.py Co-authored-by: Benjamin Chislett Signed-off-by: Aaron Pham --- tests/v1/entrypoints/llm/test_struct_output_generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index c0dd6910c83d..9a17562156d0 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -886,7 +886,7 @@ def test_structured_output_with_structural_tag( structural_tag=json.dumps(structural_tag_config)), ) - prompt = "Hello and repete hello 10 times, do not say anything else. Only say hello hello hello, now start" + prompt = "Hello and repeat hello 10 times, do not say anything else. Only say hello hello hello, now start" outputs = llm.generate(prompt, sampling_params=sampling_params, use_tqdm=True) From 151c35bac024bea2080ac089cb84eb919df909d6 Mon Sep 17 00:00:00 2001 From: Hanchenli Date: Mon, 6 Oct 2025 12:21:57 -0700 Subject: [PATCH 4/5] fixing precommit Signed-off-by: Hanchenli --- .../llm/test_struct_output_generate.py | 29 ++++++++----------- vllm/entrypoints/openai/protocol.py | 18 ++++++++---- vllm/v1/structured_output/backend_xgrammar.py | 11 +++---- 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index 8d19f706d823..0e2da7431b3e 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -895,32 +895,25 @@ def test_structured_output_with_structural_tag( structural_tag_config = { "type": "structural_tag", "format": { - "type": - "triggered_tags", - "tags": [{ - "begin": "hello_flag", - "content": { - "type": "any_text" - }, - "end": "hello" - }], + "type": "triggered_tags", + "tags": [ + {"begin": "hello_flag", "content": {"type": "any_text"}, "end": "hello"} + ], "triggers": ["hello"], - "stop_after_first": - False - } + "stop_after_first": False, + }, } sampling_params = SamplingParams( temperature=0.0, max_tokens=500, guided_decoding=GuidedDecodingParams( - structural_tag=json.dumps(structural_tag_config)), + structural_tag=json.dumps(structural_tag_config) + ), ) prompt = "Hello and repeat hello 10 times, do not say anything else. Only say hello hello hello, now start" - outputs = llm.generate(prompt, - sampling_params=sampling_params, - use_tqdm=True) + outputs = llm.generate(prompt, sampling_params=sampling_params, use_tqdm=True) assert outputs is not None for output in outputs: assert output is not None @@ -928,4 +921,6 @@ def test_structured_output_with_structural_tag( prompt = output.prompt generated_text = output.outputs[0].text assert generated_text is not None - assert "hello_flag" in generated_text, f"Expected 'hello_flag' to be in generated text, but got: {generated_text}" + assert "hello_flag" in generated_text, ( + f"Expected 'hello_flag' to be in generated text, but got: {generated_text}" + ) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 6958e7e19a8a..f0b86222365c 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -207,8 +207,9 @@ class StructuralTagResponseFormat(OpenAIBaseModel): format: Any -AnyStructuralTagResponseFormat = Union[LegacyStructuralTagResponseFormat, - StructuralTagResponseFormat] +AnyStructuralTagResponseFormat = Union[ + LegacyStructuralTagResponseFormat, StructuralTagResponseFormat +] class ResponseFormat(OpenAIBaseModel): @@ -217,8 +218,9 @@ class ResponseFormat(OpenAIBaseModel): json_schema: Optional[JsonSchemaResponseFormat] = None -AnyResponseFormat = Union[ResponseFormat, LegacyStructuralTagResponseFormat, - StructuralTagResponseFormat] +AnyResponseFormat = Union[ + ResponseFormat, LegacyStructuralTagResponseFormat, StructuralTagResponseFormat +] class StreamOptions(OpenAIBaseModel): @@ -821,8 +823,12 @@ def to_sampling_params( elif response_format.type == "structural_tag": structural_tag = response_format assert structural_tag is not None and isinstance( - structural_tag, (LegacyStructuralTagResponseFormat, - StructuralTagResponseFormat)) + structural_tag, + ( + LegacyStructuralTagResponseFormat, + StructuralTagResponseFormat, + ), + ) s_tag_obj = structural_tag.model_dump(by_alias=True) self.structured_outputs.structural_tag = json.dumps(s_tag_obj) diff --git a/vllm/v1/structured_output/backend_xgrammar.py b/vllm/v1/structured_output/backend_xgrammar.py index 519ae264984f..f48af4ceb8ef 100644 --- a/vllm/v1/structured_output/backend_xgrammar.py +++ b/vllm/v1/structured_output/backend_xgrammar.py @@ -127,16 +127,16 @@ def compile_grammar( ) ctx = self.compiler.compile_structural_tag(structural_tag) if "structures" in s_tag: - #Falling back to deprecated method of compiling structural tag + # Falling back to deprecated method of compiling structural tag tags = [ xgr.StructuralTagItem( begin=s["begin"], schema=json.dumps(s["schema"]), end=s["end"], - ) for s in s_tag["structures"] + ) + for s in s_tag["structures"] ] - ctx = self.compiler.compile_structural_tag( - tags, s_tag["triggers"]) + ctx = self.compiler.compile_structural_tag(tags, s_tag["triggers"]) else: ctx = self.compiler.compile_structural_tag(grammar_spec) else: @@ -364,7 +364,8 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None: begin=s["begin"], schema=json.dumps(s["schema"]), end=s["end"], - ) for s in s_tag["structures"] + ) + for s in s_tag["structures"] ] xgr.Grammar.from_structural_tag(tags, s_tag["triggers"]) else: From db32e8bb771b1dd551bd1a8a948a594cb321b20c Mon Sep 17 00:00:00 2001 From: Hanchenli Date: Tue, 7 Oct 2025 00:53:50 -0700 Subject: [PATCH 5/5] fixed bug Signed-off-by: Hanchenli --- vllm/v1/structured_output/backend_xgrammar.py | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/vllm/v1/structured_output/backend_xgrammar.py b/vllm/v1/structured_output/backend_xgrammar.py index f48af4ceb8ef..d93be4f9adec 100644 --- a/vllm/v1/structured_output/backend_xgrammar.py +++ b/vllm/v1/structured_output/backend_xgrammar.py @@ -114,18 +114,6 @@ def compile_grammar( ctx = self.compiler.compile_regex(grammar_spec) elif request_type == StructuredOutputOptions.STRUCTURAL_TAG: s_tag = json.loads(grammar_spec) - tags = [ - xgr.StructuralTagItem( - begin=s["begin"], - schema=json.dumps(s["schema"]), - end=s["end"], - ) - for s in s_tag["structures"] - ] - structural_tag = xgr.StructuralTag.from_legacy_structural_tag( - tags, s_tag["triggers"] - ) - ctx = self.compiler.compile_structural_tag(structural_tag) if "structures" in s_tag: # Falling back to deprecated method of compiling structural tag tags = [ @@ -136,7 +124,10 @@ def compile_grammar( ) for s in s_tag["structures"] ] - ctx = self.compiler.compile_structural_tag(tags, s_tag["triggers"]) + structural_tag = xgr.StructuralTag.from_legacy_structural_tag( + tags, s_tag["triggers"] + ) + ctx = self.compiler.compile_structural_tag(structural_tag) else: ctx = self.compiler.compile_structural_tag(grammar_spec) else: @@ -367,7 +358,10 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None: ) for s in s_tag["structures"] ] - xgr.Grammar.from_structural_tag(tags, s_tag["triggers"]) + structural_tag = xgr.StructuralTag.from_legacy_structural_tag( + tags, s_tag["triggers"] + ) + xgr.Grammar.from_structural_tag(structural_tag) else: xgr.Grammar.from_structural_tag(so_params.structural_tag) except Exception as e: