diff --git a/docs/advanced_features/structured_outputs.ipynb b/docs/advanced_features/structured_outputs.ipynb
index 1382f1e0e28a..61cc97944fc1 100644
--- a/docs/advanced_features/structured_outputs.ipynb
+++ b/docs/advanced_features/structured_outputs.ipynb
@@ -349,6 +349,44 @@
"print_highlight(response.choices[0].message.content)"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Support for XGrammar latest structural tag format\n",
+ "# https://xgrammar.mlc.ai/docs/tutorials/structural_tag.html\n",
+ "\n",
+ "response = client.chat.completions.create(\n",
+ " model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+ " messages=messages,\n",
+ " response_format={\n",
+ " \"type\": \"structural_tag\",\n",
+ " \"format\": {\n",
+ " \"type\": \"triggered_tags\",\n",
+ " \"triggers\": [\"\",\n",
+ " \"content\": schema_get_current_weather,\n",
+ " \"end\": \"\",\n",
+ " },\n",
+ " {\n",
+ " \"begin\": \"\",\n",
+ " \"content\": schema_get_current_date,\n",
+ " \"end\": \"\",\n",
+ " },\n",
+ " ],\n",
+ " \"at_least_one\": False,\n",
+ " \"stop_after_first\": False,\n",
+ " }\n",
+ " },\n",
+ ")\n",
+ "\n",
+ "print_highlight(response.choices[0].message.content)"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -594,6 +632,50 @@
"print_highlight(response.json())"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Support for XGrammar latest structural tag format\n",
+ "# https://xgrammar.mlc.ai/docs/tutorials/structural_tag.html\n",
+ "\n",
+ "payload = {\n",
+ " \"text\": text,\n",
+ " \"sampling_params\": {\n",
+ " \"structural_tag\": json.dumps(\n",
+ " {\n",
+ " \"type\": \"structural_tag\",\n",
+ " \"format\": {\n",
+ " \"type\": \"triggered_tags\",\n",
+ " \"triggers\": [\"\",\n",
+ " \"content\": schema_get_current_weather,\n",
+ " \"end\": \"\",\n",
+ " },\n",
+ " {\n",
+ " \"begin\": \"\",\n",
+ " \"content\": schema_get_current_date,\n",
+ " \"end\": \"\",\n",
+ " },\n",
+ " ],\n",
+ " \"at_least_one\": False,\n",
+ " \"stop_after_first\": False,\n",
+ " }\n",
+ " }\n",
+ " )\n",
+ " },\n",
+ "}\n",
+ "\n",
+ "\n",
+ "# Send POST request to the API endpoint\n",
+ "response = requests.post(f\"http://localhost:{port}/generate\", json=payload)\n",
+ "print_highlight(response.json())"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -825,6 +907,51 @@
" print_highlight(f\"Prompt: {prompt}\\nGenerated text: {output['text']}\")"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Support for XGrammar latest structural tag format\n",
+ "# https://xgrammar.mlc.ai/docs/tutorials/structural_tag.html\n",
+ "\n",
+ "sampling_params = {\n",
+ " \"temperature\": 0.8,\n",
+ " \"top_p\": 0.95,\n",
+ " \"structural_tag\": json.dumps(\n",
+ " {\n",
+ " \"type\": \"structural_tag\",\n",
+ " \"format\": {\n",
+ " \"type\": \"triggered_tags\",\n",
+ " \"triggers\": [\"\",\n",
+ " \"content\": schema_get_current_weather,\n",
+ " \"end\": \"\",\n",
+ " },\n",
+ " {\n",
+ " \"begin\": \"\",\n",
+ " \"content\": schema_get_current_date,\n",
+ " \"end\": \"\",\n",
+ " },\n",
+ " ],\n",
+ " \"at_least_one\": False,\n",
+ " \"stop_after_first\": False,\n",
+ " }\n",
+ " }\n",
+ " ),\n",
+ "}\n",
+ "\n",
+ "\n",
+ "# Send POST request to the API endpoint\n",
+ "outputs = llm.generate(prompts, sampling_params)\n",
+ "for prompt, output in zip(prompts, outputs):\n",
+ " print_highlight(\"===============================\")\n",
+ " print_highlight(f\"Prompt: {prompt}\\nGenerated text: {output['text']}\")"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
diff --git a/python/sglang/srt/constrained/xgrammar_backend.py b/python/sglang/srt/constrained/xgrammar_backend.py
index 3f21e854b3f4..eb00f24e2925 100644
--- a/python/sglang/srt/constrained/xgrammar_backend.py
+++ b/python/sglang/srt/constrained/xgrammar_backend.py
@@ -238,17 +238,22 @@ def dispatch_regex(self, key_string: str) -> Optional[XGrammarGrammar]:
def dispatch_structural_tag(self, key_string: str) -> Optional[XGrammarGrammar]:
try:
structural_tag = json.loads(key_string)
- tags = [
- StructuralTagItem(
- begin=structure["begin"],
- schema=json.dumps(structure["schema"]),
- end=structure["end"],
+ if "format" in structural_tag:
+ # V1 format
+ ctx = self.grammar_compiler.compile_structural_tag(structural_tag)
+ else:
+ # Deprecated format
+ tags = [
+ StructuralTagItem(
+ begin=structure["begin"],
+ schema=json.dumps(structure["schema"]),
+ end=structure["end"],
+ )
+ for structure in structural_tag["structures"]
+ ]
+ ctx = self.grammar_compiler.compile_structural_tag(
+ tags, structural_tag["triggers"]
)
- for structure in structural_tag["structures"]
- ]
- ctx = self.grammar_compiler.compile_structural_tag(
- tags, structural_tag["triggers"]
- )
except (RuntimeError, json.decoder.JSONDecodeError) as e:
logging.error(f"Hit invalid structural_tag: {key_string=}, {e=}")
return INVALID_GRAMMAR_OBJ
diff --git a/python/sglang/srt/entrypoints/openai/protocol.py b/python/sglang/srt/entrypoints/openai/protocol.py
index 8111f193980f..af731bd94f75 100644
--- a/python/sglang/srt/entrypoints/openai/protocol.py
+++ b/python/sglang/srt/entrypoints/openai/protocol.py
@@ -126,6 +126,9 @@ class StructuralTagResponseFormat(BaseModel):
structures: List[StructuresResponseFormat]
triggers: List[str]
+class StructuralTagResponseFormatV1(BaseModel):
+ type: Literal["structural_tag"]
+ format: Dict[str, Any]
class FileRequest(BaseModel):
# https://platform.openai.com/docs/api-reference/files/create
@@ -219,7 +222,7 @@ class CompletionRequest(BaseModel):
skip_special_tokens: bool = True
lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None
session_params: Optional[Dict] = None
- response_format: Optional[Union[ResponseFormat, StructuralTagResponseFormat]] = None
+ response_format: Optional[Union[ResponseFormat, StructuralTagResponseFormat, StructuralTagResponseFormatV1]] = None
# For PD disaggregation
bootstrap_host: Optional[Union[List[str], str]] = None
@@ -432,7 +435,7 @@ class ChatCompletionRequest(BaseModel):
)
n: int = 1
presence_penalty: float = 0.0
- response_format: Optional[Union[ResponseFormat, StructuralTagResponseFormat]] = None
+ response_format: Optional[Union[ResponseFormat, StructuralTagResponseFormat, StructuralTagResponseFormatV1]] = None
seed: Optional[int] = None
stop: Optional[Union[str, List[str]]] = None
stream: bool = False