Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements/common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pybase64 # fast base64 implementation
cbor2 # Required for cross-language serialization of hashable objects
ijson # Required for mistral streaming tool parser
setproctitle # Used to set process names for better debugging and monitoring
openai-harmony >= 0.0.3 # Required for gpt-oss
openai-harmony >= 0.0.8 # Required for gpt-oss
anthropic >= 0.71.0
model-hosting-container-standards >= 0.1.13, < 1.0.0
mcp
Expand Down
2 changes: 1 addition & 1 deletion requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,7 @@ omegaconf==2.3.0
# lightning
open-clip-torch==2.32.0
# via -r requirements/test.in
openai-harmony==0.0.4
openai-harmony==0.0.8
# via gpt-oss
opencensus==0.11.4
# via ray
Expand Down
24 changes: 24 additions & 0 deletions tests/entrypoints/openai/parser/test_harmony_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from vllm.entrypoints.openai.parser.harmony_utils import (
auto_drop_analysis_messages,
get_encoding,
get_streamable_parser_for_assistant,
get_system_message,
has_custom_tools,
parse_chat_input_to_harmony_message,
Expand Down Expand Up @@ -928,3 +929,26 @@ def test_reasoning_with_empty_content_returns_none(self):
msg = response_input_to_harmony(item, prev_responses=[])

assert msg is None


def test_malformed_refusal_message() -> None:
"""Test parsing a malformed refusal message sometimes generated by gpt-oss"""
output_text = (
"...\n\nAccording to policy, we must refuse.<|end|>"
"<|start|>assistant<|channel|>analysis<|message|>We must refuse.<|end|>"
"<|start|>assistant<|channel|>final<|message|>I can't help with that.<|end|>"
)
output_tokens = get_encoding().encode(output_text, allowed_special="all")
parser = get_streamable_parser_for_assistant()
for token in output_tokens:
parser.process(token)
assert len(parser.messages) == 3
assert parser.messages[0].author.role == Role.ASSISTANT
# using "in" here instead of "==" to allow for whitespace variances
assert "According to policy, we must refuse." in parser.messages[0].content[0].text
assert parser.messages[1].author.role == Role.ASSISTANT
assert parser.messages[1].channel == "analysis"
assert parser.messages[1].content[0].text == "We must refuse."
assert parser.messages[2].author.role == Role.ASSISTANT
assert parser.messages[2].channel == "final"
assert parser.messages[2].content[0].text == "I can't help with that."
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/parser/harmony_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def get_stop_tokens_for_assistant_actions() -> list[int]:


def get_streamable_parser_for_assistant() -> StreamableParser:
return StreamableParser(get_encoding(), role=Role.ASSISTANT)
return StreamableParser(get_encoding(), role=Role.ASSISTANT, strict=False)


def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser:
Expand Down
Loading