diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index b5d04679317e..d1721692806b 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -864,3 +864,49 @@ def test_structured_output_batched_with_non_structured_outputs_requests( # non-structured outputs requests should not return a valid JSON here with pytest.raises(ValueError): output_json = json.loads(generated_text) + + +@pytest.mark.parametrize("guided_decoding_backend", ["xgrammar"]) +def test_structured_output_with_structural_tag( + monkeypatch: pytest.MonkeyPatch, + guided_decoding_backend: str, +): + monkeypatch.setenv("VLLM_USE_V1", "1") + + llm = LLM( + model="Qwen/Qwen2.5-1.5B-Instruct", + guided_decoding_backend=guided_decoding_backend, + ) + + structural_tag_config = { + "type": "structural_tag", + "format": { + "type": "triggered_tags", + "tags": [ + {"begin": "hello_flag", "content": {"type": "any_text"}, "end": "hello"} + ], + "triggers": ["hello"], + "stop_after_first": False, + }, + } + + sampling_params = SamplingParams( + temperature=0.0, + max_tokens=500, + guided_decoding=GuidedDecodingParams( + structural_tag=json.dumps(structural_tag_config) + ), + ) + + prompt = "Hello and repeat hello 10 times, do not say anything else. Only say hello hello hello, now start" + outputs = llm.generate(prompt, sampling_params=sampling_params, use_tqdm=True) + assert outputs is not None + for output in outputs: + assert output is not None + assert isinstance(output, RequestOutput) + prompt = output.prompt + generated_text = output.outputs[0].text + assert generated_text is not None + assert "hello_flag" in generated_text, ( + f"Expected 'hello_flag' to be in generated text, but got: {generated_text}" + ) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 6ff7ceef4805..257da2879f64 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -187,7 +187,7 @@ class JsonSchemaResponseFormat(OpenAIBaseModel): strict: Optional[bool] = None -class StructuralTag(OpenAIBaseModel): +class LegacyStructuralTag(OpenAIBaseModel): begin: str # schema is the field, but that causes conflicts with pydantic so # instead use structural_tag_schema with an alias @@ -197,19 +197,31 @@ class StructuralTag(OpenAIBaseModel): end: str -class StructuralTagResponseFormat(OpenAIBaseModel): +class LegacyStructuralTagResponseFormat(OpenAIBaseModel): type: Literal["structural_tag"] - structures: list[StructuralTag] + structures: list[LegacyStructuralTag] triggers: list[str] +class StructuralTagResponseFormat(OpenAIBaseModel): + type: Literal["structural_tag"] + format: Any + + +AnyStructuralTagResponseFormat = Union[ + LegacyStructuralTagResponseFormat, StructuralTagResponseFormat +] + + class ResponseFormat(OpenAIBaseModel): # type must be "json_schema", "json_object", or "text" type: Literal["text", "json_object", "json_schema"] json_schema: Optional[JsonSchemaResponseFormat] = None -AnyResponseFormat = Union[ResponseFormat, StructuralTagResponseFormat] +AnyResponseFormat = Union[ + ResponseFormat, LegacyStructuralTagResponseFormat, StructuralTagResponseFormat +] class StreamOptions(OpenAIBaseModel): @@ -812,7 +824,11 @@ def to_sampling_params( elif response_format.type == "structural_tag": structural_tag = response_format assert structural_tag is not None and isinstance( - structural_tag, StructuralTagResponseFormat + structural_tag, + ( + LegacyStructuralTagResponseFormat, + StructuralTagResponseFormat, + ), ) s_tag_obj = structural_tag.model_dump(by_alias=True) self.structured_outputs.structural_tag = json.dumps(s_tag_obj) diff --git a/vllm/v1/structured_output/backend_xgrammar.py b/vllm/v1/structured_output/backend_xgrammar.py index 9f81d09633d7..d93be4f9adec 100644 --- a/vllm/v1/structured_output/backend_xgrammar.py +++ b/vllm/v1/structured_output/backend_xgrammar.py @@ -114,18 +114,22 @@ def compile_grammar( ctx = self.compiler.compile_regex(grammar_spec) elif request_type == StructuredOutputOptions.STRUCTURAL_TAG: s_tag = json.loads(grammar_spec) - tags = [ - xgr.StructuralTagItem( - begin=s["begin"], - schema=json.dumps(s["schema"]), - end=s["end"], + if "structures" in s_tag: + # Falling back to deprecated method of compiling structural tag + tags = [ + xgr.StructuralTagItem( + begin=s["begin"], + schema=json.dumps(s["schema"]), + end=s["end"], + ) + for s in s_tag["structures"] + ] + structural_tag = xgr.StructuralTag.from_legacy_structural_tag( + tags, s_tag["triggers"] ) - for s in s_tag["structures"] - ] - structural_tag = xgr.StructuralTag.from_legacy_structural_tag( - tags, s_tag["triggers"] - ) - ctx = self.compiler.compile_structural_tag(structural_tag) + ctx = self.compiler.compile_structural_tag(structural_tag) + else: + ctx = self.compiler.compile_structural_tag(grammar_spec) else: logger.error( "Validation should have already occurred. Please file an issue." @@ -343,17 +347,22 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None: if so_params.structural_tag: try: s_tag = json.loads(so_params.structural_tag) - tags = [ - xgr.StructuralTagItem( - begin=s["begin"], - schema=json.dumps(s["schema"]), - end=s["end"], + + # Using the deprecated method of compiling structural tag + if "structures" in s_tag: + tags = [ + xgr.StructuralTagItem( + begin=s["begin"], + schema=json.dumps(s["schema"]), + end=s["end"], + ) + for s in s_tag["structures"] + ] + structural_tag = xgr.StructuralTag.from_legacy_structural_tag( + tags, s_tag["triggers"] ) - for s in s_tag["structures"] - ] - structural_tag = xgr.StructuralTag.from_legacy_structural_tag( - tags, s_tag["triggers"] - ) - xgr.Grammar.from_structural_tag(structural_tag) + xgr.Grammar.from_structural_tag(structural_tag) + else: + xgr.Grammar.from_structural_tag(so_params.structural_tag) except Exception as e: raise ValueError("Invalid structural tag specification.") from e