From a55024e8222ce7720bcb17b47bb670030c1abafc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=A0=84=EC=83=81=EB=B2=94?= Date: Fri, 20 Feb 2026 11:25:53 +0000 Subject: [PATCH 1/2] [OpenAI] Merge system/developer instructions into the tool block for Harmony MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 전상범 --- .../openai/chat_completion/serving.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py index f1af14dd9029..0cdcc3742ed2 100644 --- a/vllm/entrypoints/openai/chat_completion/serving.py +++ b/vllm/entrypoints/openai/chat_completion/serving.py @@ -1951,12 +1951,19 @@ def _make_request_with_harmony( ) messages.append(sys_msg) + merged_instructions = None + if request.messages and ( + request.messages[0]["role"] == "system" + or request.messages[0]["role"] == "developer" + ): + merged_instructions = request.messages[0]["content"] + request.messages.pop(0) # Add developer message. - if request.tools: - dev_msg = get_developer_message( - tools=request.tools if should_include_tools else None # type: ignore[arg-type] - ) - messages.append(dev_msg) + dev_msg = get_developer_message( + instructions=merged_instructions, + tools=request.tools if should_include_tools else None, # type: ignore[arg-type] + ) + messages.append(dev_msg) # Add user message. messages.extend(parse_chat_inputs_to_harmony_messages(request.messages)) From d226cf7321cbf554940496b36479225d2ed6df85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=A0=84=EC=83=81=EB=B2=94?= Date: Fri, 20 Feb 2026 12:12:45 +0000 Subject: [PATCH 2/2] [Frontend] Merge system/developer instructions into the tool block for Harmony MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 전상범 --- .../openai/chat_completion/serving.py | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py index 0cdcc3742ed2..5adf861fa130 100644 --- a/vllm/entrypoints/openai/chat_completion/serving.py +++ b/vllm/entrypoints/openai/chat_completion/serving.py @@ -1951,22 +1951,24 @@ def _make_request_with_harmony( ) messages.append(sys_msg) - merged_instructions = None - if request.messages and ( - request.messages[0]["role"] == "system" - or request.messages[0]["role"] == "developer" - ): - merged_instructions = request.messages[0]["content"] - request.messages.pop(0) + chat_messages = request.messages + merged_instructions: str | None = None + if chat_messages and chat_messages[0]["role"] in ("system", "developer"): + content = chat_messages[0].get("content") + if isinstance(content, str): + merged_instructions = content + chat_messages = chat_messages[1:] + # Add developer message. - dev_msg = get_developer_message( - instructions=merged_instructions, - tools=request.tools if should_include_tools else None, # type: ignore[arg-type] - ) - messages.append(dev_msg) + if request.tools or merged_instructions: + dev_msg = get_developer_message( + instructions=merged_instructions, + tools=request.tools if should_include_tools else None, # type: ignore[arg-type] + ) + messages.append(dev_msg) # Add user message. - messages.extend(parse_chat_inputs_to_harmony_messages(request.messages)) + messages.extend(parse_chat_inputs_to_harmony_messages(chat_messages)) # Render prompt token ids. prompt_token_ids = render_for_completion(messages)