diff --git a/docs/advanced_features/structured_outputs.ipynb b/docs/advanced_features/structured_outputs.ipynb index 1382f1e0e28a..61cc97944fc1 100644 --- a/docs/advanced_features/structured_outputs.ipynb +++ b/docs/advanced_features/structured_outputs.ipynb @@ -349,6 +349,44 @@ "print_highlight(response.choices[0].message.content)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Support for XGrammar latest structural tag format\n", + "# https://xgrammar.mlc.ai/docs/tutorials/structural_tag.html\n", + "\n", + "response = client.chat.completions.create(\n", + " model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n", + " messages=messages,\n", + " response_format={\n", + " \"type\": \"structural_tag\",\n", + " \"format\": {\n", + " \"type\": \"triggered_tags\",\n", + " \"triggers\": [\"\",\n", + " \"content\": schema_get_current_weather,\n", + " \"end\": \"\",\n", + " },\n", + " {\n", + " \"begin\": \"\",\n", + " \"content\": schema_get_current_date,\n", + " \"end\": \"\",\n", + " },\n", + " ],\n", + " \"at_least_one\": False,\n", + " \"stop_after_first\": False,\n", + " }\n", + " },\n", + ")\n", + "\n", + "print_highlight(response.choices[0].message.content)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -594,6 +632,50 @@ "print_highlight(response.json())" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Support for XGrammar latest structural tag format\n", + "# https://xgrammar.mlc.ai/docs/tutorials/structural_tag.html\n", + "\n", + "payload = {\n", + " \"text\": text,\n", + " \"sampling_params\": {\n", + " \"structural_tag\": json.dumps(\n", + " {\n", + " \"type\": \"structural_tag\",\n", + " \"format\": {\n", + " \"type\": \"triggered_tags\",\n", + " \"triggers\": [\"\",\n", + " \"content\": schema_get_current_weather,\n", + " \"end\": \"\",\n", + " },\n", + " {\n", + " \"begin\": \"\",\n", + " \"content\": schema_get_current_date,\n", + " \"end\": \"\",\n", + " },\n", + " ],\n", + " \"at_least_one\": False,\n", + " \"stop_after_first\": False,\n", + " }\n", + " }\n", + " )\n", + " },\n", + "}\n", + "\n", + "\n", + "# Send POST request to the API endpoint\n", + "response = requests.post(f\"http://localhost:{port}/generate\", json=payload)\n", + "print_highlight(response.json())" + ] + }, { "cell_type": "code", "execution_count": null, @@ -825,6 +907,51 @@ " print_highlight(f\"Prompt: {prompt}\\nGenerated text: {output['text']}\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Support for XGrammar latest structural tag format\n", + "# https://xgrammar.mlc.ai/docs/tutorials/structural_tag.html\n", + "\n", + "sampling_params = {\n", + " \"temperature\": 0.8,\n", + " \"top_p\": 0.95,\n", + " \"structural_tag\": json.dumps(\n", + " {\n", + " \"type\": \"structural_tag\",\n", + " \"format\": {\n", + " \"type\": \"triggered_tags\",\n", + " \"triggers\": [\"\",\n", + " \"content\": schema_get_current_weather,\n", + " \"end\": \"\",\n", + " },\n", + " {\n", + " \"begin\": \"\",\n", + " \"content\": schema_get_current_date,\n", + " \"end\": \"\",\n", + " },\n", + " ],\n", + " \"at_least_one\": False,\n", + " \"stop_after_first\": False,\n", + " }\n", + " }\n", + " ),\n", + "}\n", + "\n", + "\n", + "# Send POST request to the API endpoint\n", + "outputs = llm.generate(prompts, sampling_params)\n", + "for prompt, output in zip(prompts, outputs):\n", + " print_highlight(\"===============================\")\n", + " print_highlight(f\"Prompt: {prompt}\\nGenerated text: {output['text']}\")" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/python/sglang/srt/constrained/xgrammar_backend.py b/python/sglang/srt/constrained/xgrammar_backend.py index 3f21e854b3f4..eb00f24e2925 100644 --- a/python/sglang/srt/constrained/xgrammar_backend.py +++ b/python/sglang/srt/constrained/xgrammar_backend.py @@ -238,17 +238,22 @@ def dispatch_regex(self, key_string: str) -> Optional[XGrammarGrammar]: def dispatch_structural_tag(self, key_string: str) -> Optional[XGrammarGrammar]: try: structural_tag = json.loads(key_string) - tags = [ - StructuralTagItem( - begin=structure["begin"], - schema=json.dumps(structure["schema"]), - end=structure["end"], + if "format" in structural_tag: + # V1 format + ctx = self.grammar_compiler.compile_structural_tag(structural_tag) + else: + # Deprecated format + tags = [ + StructuralTagItem( + begin=structure["begin"], + schema=json.dumps(structure["schema"]), + end=structure["end"], + ) + for structure in structural_tag["structures"] + ] + ctx = self.grammar_compiler.compile_structural_tag( + tags, structural_tag["triggers"] ) - for structure in structural_tag["structures"] - ] - ctx = self.grammar_compiler.compile_structural_tag( - tags, structural_tag["triggers"] - ) except (RuntimeError, json.decoder.JSONDecodeError) as e: logging.error(f"Hit invalid structural_tag: {key_string=}, {e=}") return INVALID_GRAMMAR_OBJ diff --git a/python/sglang/srt/entrypoints/openai/protocol.py b/python/sglang/srt/entrypoints/openai/protocol.py index 8111f193980f..af731bd94f75 100644 --- a/python/sglang/srt/entrypoints/openai/protocol.py +++ b/python/sglang/srt/entrypoints/openai/protocol.py @@ -126,6 +126,9 @@ class StructuralTagResponseFormat(BaseModel): structures: List[StructuresResponseFormat] triggers: List[str] +class StructuralTagResponseFormatV1(BaseModel): + type: Literal["structural_tag"] + format: Dict[str, Any] class FileRequest(BaseModel): # https://platform.openai.com/docs/api-reference/files/create @@ -219,7 +222,7 @@ class CompletionRequest(BaseModel): skip_special_tokens: bool = True lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None session_params: Optional[Dict] = None - response_format: Optional[Union[ResponseFormat, StructuralTagResponseFormat]] = None + response_format: Optional[Union[ResponseFormat, StructuralTagResponseFormat, StructuralTagResponseFormatV1]] = None # For PD disaggregation bootstrap_host: Optional[Union[List[str], str]] = None @@ -432,7 +435,7 @@ class ChatCompletionRequest(BaseModel): ) n: int = 1 presence_penalty: float = 0.0 - response_format: Optional[Union[ResponseFormat, StructuralTagResponseFormat]] = None + response_format: Optional[Union[ResponseFormat, StructuralTagResponseFormat, StructuralTagResponseFormatV1]] = None seed: Optional[int] = None stop: Optional[Union[str, List[str]]] = None stream: bool = False