Skip to content
Closed
46 changes: 46 additions & 0 deletions tests/v1/entrypoints/llm/test_struct_output_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,3 +864,49 @@ def test_structured_output_batched_with_non_structured_outputs_requests(
# non-structured outputs requests should not return a valid JSON here
with pytest.raises(ValueError):
output_json = json.loads(generated_text)


@pytest.mark.parametrize("guided_decoding_backend", ["xgrammar"])
def test_structured_output_with_structural_tag(
monkeypatch: pytest.MonkeyPatch,
guided_decoding_backend: str,
):
monkeypatch.setenv("VLLM_USE_V1", "1")

llm = LLM(
model="Qwen/Qwen2.5-1.5B-Instruct",
guided_decoding_backend=guided_decoding_backend,
)

structural_tag_config = {
"type": "structural_tag",
"format": {
Copy link
Collaborator

@chaunceyjiang chaunceyjiang Oct 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest adding some usage examples in the examples directory.

"type": "triggered_tags",
"tags": [
{"begin": "hello_flag", "content": {"type": "any_text"}, "end": "hello"}
],
"triggers": ["hello"],
"stop_after_first": False,
},
}

sampling_params = SamplingParams(
temperature=0.0,
max_tokens=500,
guided_decoding=GuidedDecodingParams(
structural_tag=json.dumps(structural_tag_config)
),
)

prompt = "Hello and repeat hello 10 times, do not say anything else. Only say hello hello hello, now start"
outputs = llm.generate(prompt, sampling_params=sampling_params, use_tqdm=True)
assert outputs is not None
for output in outputs:
assert output is not None
assert isinstance(output, RequestOutput)
prompt = output.prompt
generated_text = output.outputs[0].text
assert generated_text is not None
assert "hello_flag" in generated_text, (
f"Expected 'hello_flag' to be in generated text, but got: {generated_text}"
)
26 changes: 21 additions & 5 deletions vllm/entrypoints/openai/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ class JsonSchemaResponseFormat(OpenAIBaseModel):
strict: Optional[bool] = None


class StructuralTag(OpenAIBaseModel):
class LegacyStructuralTag(OpenAIBaseModel):
begin: str
# schema is the field, but that causes conflicts with pydantic so
# instead use structural_tag_schema with an alias
Expand All @@ -197,19 +197,31 @@ class StructuralTag(OpenAIBaseModel):
end: str


class StructuralTagResponseFormat(OpenAIBaseModel):
class LegacyStructuralTagResponseFormat(OpenAIBaseModel):
type: Literal["structural_tag"]
structures: list[StructuralTag]
structures: list[LegacyStructuralTag]
triggers: list[str]


class StructuralTagResponseFormat(OpenAIBaseModel):
type: Literal["structural_tag"]
format: Any


AnyStructuralTagResponseFormat = Union[
LegacyStructuralTagResponseFormat, StructuralTagResponseFormat
]


class ResponseFormat(OpenAIBaseModel):
# type must be "json_schema", "json_object", or "text"
type: Literal["text", "json_object", "json_schema"]
json_schema: Optional[JsonSchemaResponseFormat] = None


AnyResponseFormat = Union[ResponseFormat, StructuralTagResponseFormat]
AnyResponseFormat = Union[
ResponseFormat, LegacyStructuralTagResponseFormat, StructuralTagResponseFormat
]


class StreamOptions(OpenAIBaseModel):
Expand Down Expand Up @@ -812,7 +824,11 @@ def to_sampling_params(
elif response_format.type == "structural_tag":
structural_tag = response_format
assert structural_tag is not None and isinstance(
structural_tag, StructuralTagResponseFormat
structural_tag,
(
LegacyStructuralTagResponseFormat,
StructuralTagResponseFormat,
),
)
s_tag_obj = structural_tag.model_dump(by_alias=True)
self.structured_outputs.structural_tag = json.dumps(s_tag_obj)
Expand Down
53 changes: 31 additions & 22 deletions vllm/v1/structured_output/backend_xgrammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,18 +114,22 @@ def compile_grammar(
ctx = self.compiler.compile_regex(grammar_spec)
elif request_type == StructuredOutputOptions.STRUCTURAL_TAG:
s_tag = json.loads(grammar_spec)
tags = [
xgr.StructuralTagItem(
begin=s["begin"],
schema=json.dumps(s["schema"]),
end=s["end"],
if "structures" in s_tag:
# Falling back to deprecated method of compiling structural tag
tags = [
xgr.StructuralTagItem(
begin=s["begin"],
schema=json.dumps(s["schema"]),
end=s["end"],
)
for s in s_tag["structures"]
]
structural_tag = xgr.StructuralTag.from_legacy_structural_tag(
tags, s_tag["triggers"]
)
for s in s_tag["structures"]
]
structural_tag = xgr.StructuralTag.from_legacy_structural_tag(
tags, s_tag["triggers"]
)
ctx = self.compiler.compile_structural_tag(structural_tag)
ctx = self.compiler.compile_structural_tag(structural_tag)
else:
ctx = self.compiler.compile_structural_tag(grammar_spec)
else:
logger.error(
"Validation should have already occurred. Please file an issue."
Expand Down Expand Up @@ -343,17 +347,22 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None:
if so_params.structural_tag:
try:
s_tag = json.loads(so_params.structural_tag)
tags = [
xgr.StructuralTagItem(
begin=s["begin"],
schema=json.dumps(s["schema"]),
end=s["end"],

# Using the deprecated method of compiling structural tag
if "structures" in s_tag:
tags = [
xgr.StructuralTagItem(
begin=s["begin"],
schema=json.dumps(s["schema"]),
end=s["end"],
)
for s in s_tag["structures"]
]
structural_tag = xgr.StructuralTag.from_legacy_structural_tag(
tags, s_tag["triggers"]
)
for s in s_tag["structures"]
]
structural_tag = xgr.StructuralTag.from_legacy_structural_tag(
tags, s_tag["triggers"]
)
xgr.Grammar.from_structural_tag(structural_tag)
xgr.Grammar.from_structural_tag(structural_tag)
else:
xgr.Grammar.from_structural_tag(so_params.structural_tag)
except Exception as e:
raise ValueError("Invalid structural tag specification.") from e