Skip to content
1 change: 1 addition & 0 deletions python/sglang/srt/constrained/xgrammar_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def _from_context(self, ctx: CompiledGrammar) -> XGrammarGrammar:
def dispatch_json(self, key_string: str) -> Optional[XGrammarGrammar]:
try:
if key_string == "$$ANY$$":
# Note: This builtin JSON grammar includes *all* valid JSON (including, for example, arrays at the root)
ctx = self.grammar_compiler.compile_builtin_json_grammar()
else:
ctx = self.grammar_compiler.compile_json_schema(schema=key_string)
Expand Down
2 changes: 2 additions & 0 deletions python/sglang/srt/openai_api/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1039,6 +1039,8 @@ def v1_chat_generate_request(
sampling_params["json_schema"] = convert_json_schema_to_str(
request.response_format.json_schema.schema_
)
elif request.response_format and request.response_format.type == "json_object":
sampling_params["json_schema"] = '{"type": "object"}'
elif (
request.response_format and request.response_format.type == "structural_tag"
):
Expand Down
137 changes: 137 additions & 0 deletions test/srt/test_json_mode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
"""
python3 -m unittest test_json_mode.TestJSONModeOutlines.test_json_mode_response
python3 -m unittest test_json_mode.TestJSONModeOutlines.test_json_mode_with_streaming

python3 -m unittest test_json_mode.TestJSONModeXGrammar.test_json_mode_response
python3 -m unittest test_json_mode.TestJSONModeXGrammar.test_json_mode_with_streaming

python3 -m unittest test_json_mode.TestJSONModeLLGuidance.test_json_mode_response
python3 -m unittest test_json_mode.TestJSONModeLLGuidance.test_json_mode_with_streaming
"""

import json
import unittest

import openai

from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
)


def setup_class(cls, backend):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST

other_args = [
"--max-running-requests",
"10",
"--grammar-backend",
backend,
]

cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=other_args,
)
cls.client = openai.Client(api_key="EMPTY", base_url=f"{cls.base_url}/v1")


class TestJSONModeOutlines(unittest.TestCase):
@classmethod
def setUpClass(cls):
setup_class(cls, "outlines")

@classmethod
def tearDownClass(cls):
kill_process_tree(cls.process.pid)

def test_json_mode_response(self):
"""Test that response_format json_object (also known as "json mode") produces valid JSON, even without a system prompt that mentions JSON."""
response = self.client.chat.completions.create(
model=self.model,
messages=[
# We are deliberately omitting "That produces JSON" or similar phrases from the assistant prompt so that we don't have misleading test results
{
"role": "system",
"content": "You are a helpful AI assistant that gives a short answer.",
},
{"role": "user", "content": "What is the capital of Bulgaria?"},
],
temperature=0,
max_tokens=128,
response_format={"type": "json_object"},
)
text = response.choices[0].message.content

print(f"Response ({len(text)} characters): {text}")

# Verify the response is valid JSON
try:
js_obj = json.loads(text)
except json.JSONDecodeError as e:
self.fail(f"Response is not valid JSON. Error: {e}. Response: {text}")

# Verify it's actually an object (dict)
self.assertIsInstance(js_obj, dict, f"Response is not a JSON object: {text}")

def test_json_mode_with_streaming(self):
"""Test that streaming with json_object response (also known as "json mode") format works correctly, even without a system prompt that mentions JSON."""
stream = self.client.chat.completions.create(
model=self.model,
messages=[
# We are deliberately omitting "That produces JSON" or similar phrases from the assistant prompt so that we don't have misleading test results
{
"role": "system",
"content": "You are a helpful AI assistant that gives a short answer.",
},
{"role": "user", "content": "What is the capital of Bulgaria?"},
],
temperature=0,
max_tokens=128,
response_format={"type": "json_object"},
stream=True,
)

# Collect all chunks
chunks = []
for chunk in stream:
if chunk.choices[0].delta.content is not None:
chunks.append(chunk.choices[0].delta.content)
full_response = "".join(chunks)

print(
f"Concatenated Response ({len(full_response)} characters): {full_response}"
)

# Verify the combined response is valid JSON
try:
js_obj = json.loads(full_response)
except json.JSONDecodeError as e:
self.fail(
f"Streamed response is not valid JSON. Error: {e}. Response: {full_response}"
)

self.assertIsInstance(js_obj, dict)


class TestJSONModeXGrammar(TestJSONModeOutlines):
@classmethod
def setUpClass(cls):
setup_class(cls, backend="xgrammar")


class TestJSONModeLLGuidance(TestJSONModeOutlines):
@classmethod
def setUpClass(cls):
setup_class(cls, backend="llguidance")


if __name__ == "__main__":
unittest.main()
Loading