Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions python/sglang/srt/entrypoints/openai/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,12 +588,13 @@ class ChatCompletionRequest(BaseModel):
return_hidden_states: bool = False
return_routed_experts: bool = False
return_cached_tokens_details: bool = False
reasoning_effort: Optional[Literal["low", "medium", "high"]] = Field(
reasoning_effort: Optional[Literal["none", "low", "medium", "high"]] = Field(
default="medium",
description="Constrains effort on reasoning for reasoning models. "
"'low' is the least effort, 'high' is the most effort. Reducing reasoning effort can "
"result in faster responses and fewer tokens used on reasoning in a response. "
"Currently only supported for OpenAI models in the harmony path, i.e GPT-OSS models.",
"'none' disables reasoning entirely, 'low' is the least effort, 'high' is the most effort. "
"Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning "
"in a response. 'none' defaults thinking and enable_thinking to false in "
"chat_template_kwargs (unless explicitly overridden). Not supported in the harmony path.",
)

# Extra parameters for SRT backend only and will be ignored by OpenAI models.
Expand Down Expand Up @@ -672,12 +673,10 @@ def set_tool_choice_default(cls, values):
@classmethod
def normalize_reasoning_inputs(cls, values: Dict):
r = values.get("reasoning")
if r is None:
return values

if isinstance(r, dict):
if r is not None and isinstance(r, dict):
effort = r.get("effort") or r.get("reasoning_effort")
if effort in {"low", "medium", "high"}:
if effort in {"none", "low", "medium", "high"}:
values["reasoning_effort"] = effort

enabled = (
Expand All @@ -694,6 +693,17 @@ def normalize_reasoning_inputs(cls, values: Dict):
ctk.setdefault("thinking", True)
values["chat_template_kwargs"] = ctk

if values.get("reasoning_effort") == "none":
ctk = values.get("chat_template_kwargs")
if not isinstance(ctk, dict):
ctk = {}
# different models check different keys:
# - "thinking" for deepseek-v3, kimi_k2
# - "enable_thinking" for qwen3, glm45, nemotron_3, interns1
ctk.setdefault("thinking", False)
ctk.setdefault("enable_thinking", False)
values["chat_template_kwargs"] = ctk

return values

@model_validator(mode="before")
Expand Down
5 changes: 5 additions & 0 deletions python/sglang/srt/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,11 @@ def _convert_to_internal_request(
if request.chat_template_kwargs
else None
)
if self.is_gpt_oss and reasoning_effort == "none":
raise ValueError(
f"Harmony does not support reasoning effort {reasoning_effort}"
)

if reasoning_effort is not None:
request.reasoning_effort = reasoning_effort

Expand Down
24 changes: 24 additions & 0 deletions test/registered/openai_server/basic/test_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,30 @@ def test_chat_completion_reasoning_effort(self):
self.assertEqual(request.reasoning_effort, "high")
self.assertEqual(request.chat_template_kwargs, {"thinking": True})

def test_chat_completion_reasoning_effort_none(self):
"""Test reasoning_effort='none' disables thinking"""
messages = [{"role": "user", "content": "Hello"}]
request = ChatCompletionRequest(
model="test-model",
messages=messages,
reasoning_effort="none",
)
self.assertEqual(request.reasoning_effort, "none")
self.assertFalse(request.chat_template_kwargs.get("thinking"))
self.assertFalse(request.chat_template_kwargs.get("enable_thinking"))

def test_chat_completion_reasoning_effort_none_from_reasoning_dict(self):
"""Test reasoning_effort='none' via nested reasoning dict"""
messages = [{"role": "user", "content": "Hello"}]
request = ChatCompletionRequest(
model="test-model",
messages=messages,
reasoning={"effort": "none"},
)
self.assertEqual(request.reasoning_effort, "none")
self.assertFalse(request.chat_template_kwargs.get("thinking"))
self.assertFalse(request.chat_template_kwargs.get("enable_thinking"))

def test_chat_completion_json_format(self):
"""Test chat completion json format"""
transcript = "Good morning! It's 7:00 AM, and I'm just waking up. Today is going to be a busy day, "
Expand Down
Loading