-
-
Notifications
You must be signed in to change notification settings - Fork 15k
[Responses API] Sanitize leaked Harmony control tokens in tool names and recipients #35901
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,7 @@ | |
| ReasoningEffort, | ||
| Role, | ||
| StreamableParser, | ||
| StreamState, | ||
| SystemContent, | ||
| TextContent, | ||
| ToolDescription, | ||
|
|
@@ -27,6 +28,126 @@ | |
|
|
||
| logger = init_logger(__name__) | ||
|
|
||
| # Harmony special token strings that may leak into tool names or recipients | ||
| # during generation by GPT-OSS models. | ||
| _HARMONY_SPECIAL_TOKEN_STRS = ( | ||
| "<|channel|>", | ||
| "<|constrain|>", | ||
| "<|start|>", | ||
| "<|end|>", | ||
| "<|message|>", | ||
| "<|call|>", | ||
| ) | ||
|
|
||
| # Harmony special token IDs (GPT-OSS encoding) | ||
| _TOK_CONSTRAIN = 200003 | ||
| _TOK_CHANNEL = 200005 | ||
| _TOK_START = 200006 | ||
| _TOK_END = 200007 | ||
| _TOK_MESSAGE = 200008 | ||
|
|
||
|
|
||
| def sanitize_harmony_name(name: str) -> str: | ||
| """Strip leaked Harmony control tokens from a tool/recipient name. | ||
|
|
||
| Finds the earliest Harmony control token string in *name* and returns | ||
| only the text before it. Returns the original string unchanged when | ||
| no control tokens are present. | ||
| """ | ||
| earliest = len(name) | ||
| for tok in _HARMONY_SPECIAL_TOKEN_STRS: | ||
| idx = name.find(tok) | ||
| if idx != -1 and idx < earliest: | ||
| earliest = idx | ||
| return name[:earliest].rstrip() | ||
|
|
||
|
|
||
| class ResilientStreamableParser: | ||
| """Wrapper around ``StreamableParser`` that recovers from two common | ||
| malformed-output patterns produced by GPT-OSS models: | ||
|
|
||
| 1. **Missing ``<|start|>`` recovery** – When the parser expects a | ||
| ``<|start|>`` token but receives ``<|channel|>`` instead, inject the | ||
| missing ``<|start|>`` + assistant role token before forwarding. | ||
|
|
||
| 2. **Malformed ``<|constrain|>`` in headers** – When the parser is in | ||
| ``HEADER`` state and receives ``<|constrain|>``, enter skip mode and | ||
| discard tokens until ``<|message|>`` or ``<|end|>`` is seen. | ||
|
|
||
| All public properties are delegated to the underlying parser. The | ||
| ``current_recipient`` getter additionally sanitizes leaked tokens. | ||
| """ | ||
|
|
||
| def __init__(self, inner: StreamableParser, encoding): | ||
| self._inner = inner | ||
| self._encoding = encoding | ||
| self._skip_until_message_or_end = False | ||
|
|
||
| # --- error-recovering process() ----------------------------------- | ||
|
|
||
| def process(self, token_id: int) -> None: | ||
| # Pattern 2: skip mode – discard until <|message|> or <|end|> | ||
| if self._skip_until_message_or_end: | ||
| if token_id in (_TOK_MESSAGE, _TOK_END): | ||
| self._skip_until_message_or_end = False | ||
| self._inner.process(token_id) | ||
| # else: silently discard the token | ||
| return | ||
|
|
||
| state = self._inner.state | ||
|
|
||
| # Pattern 1: missing <|start|> before <|channel|> | ||
| if state == StreamState.EXPECT_START and token_id == _TOK_CHANNEL: | ||
| # Inject <|start|> + assistant role token | ||
| self._inner.process(_TOK_START) | ||
| role_tokens = self._encoding.encode("assistant", allowed_special="all") | ||
| for rt in role_tokens: | ||
| self._inner.process(rt) | ||
| self._inner.process(token_id) | ||
| return | ||
|
|
||
| # Pattern 2: <|constrain|> during HEADER → enter skip mode | ||
| if state == StreamState.HEADER and token_id == _TOK_CONSTRAIN: | ||
| self._skip_until_message_or_end = True | ||
| return | ||
|
Comment on lines
+109
to
+112
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Useful? React with 👍 / 👎. |
||
|
|
||
| self._inner.process(token_id) | ||
|
|
||
| # --- delegated properties ----------------------------------------- | ||
|
|
||
| @property | ||
| def messages(self): | ||
| return self._inner.messages | ||
|
|
||
| @property | ||
| def current_content(self): | ||
| return self._inner.current_content | ||
|
|
||
| @property | ||
| def current_channel(self): | ||
| return self._inner.current_channel | ||
|
|
||
| @property | ||
| def current_recipient(self): | ||
| raw = self._inner.current_recipient | ||
| if raw is not None: | ||
| sanitized = sanitize_harmony_name(raw) | ||
| return sanitized if sanitized else None | ||
| return raw | ||
|
|
||
| @property | ||
| def current_role(self): | ||
| return self._inner.current_role | ||
|
|
||
| @property | ||
| def state(self): | ||
| return self._inner.state | ||
|
|
||
| @property | ||
| def last_content_delta(self): | ||
| return self._inner.last_content_delta | ||
|
|
||
|
|
||
| REASONING_EFFORT = { | ||
| "high": ReasoningEffort.HIGH, | ||
| "medium": ReasoningEffort.MEDIUM, | ||
|
|
@@ -256,7 +377,7 @@ def parse_chat_input_to_harmony_message( | |
|
|
||
| for call in tool_calls: | ||
| func = call.get("function", {}) | ||
| name = func.get("name", "") | ||
| name = sanitize_harmony_name(func.get("name", "")) | ||
| arguments = func.get("arguments", "") or "" | ||
| msg = Message.from_role_and_content(Role.ASSISTANT, arguments) | ||
| msg = msg.with_channel("commentary") | ||
|
|
@@ -328,8 +449,10 @@ def get_stop_tokens_for_assistant_actions() -> list[int]: | |
| return get_encoding().stop_tokens_for_assistant_actions() | ||
|
|
||
|
|
||
| def get_streamable_parser_for_assistant() -> StreamableParser: | ||
| return StreamableParser(get_encoding(), role=Role.ASSISTANT) | ||
| def get_streamable_parser_for_assistant() -> ResilientStreamableParser: | ||
| encoding = get_encoding() | ||
| inner = StreamableParser(encoding, role=Role.ASSISTANT) | ||
| return ResilientStreamableParser(inner, encoding) | ||
|
|
||
|
|
||
| def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,6 +31,7 @@ | |
| get_encoding, | ||
| get_streamable_parser_for_assistant, | ||
| render_for_completion, | ||
| sanitize_harmony_name, | ||
| ) | ||
| from vllm.entrypoints.openai.parser.responses_parser import ( | ||
| get_responses_parser_for_simple_context, | ||
|
|
@@ -669,7 +670,9 @@ def messages(self) -> list: | |
| def need_builtin_tool_call(self) -> bool: | ||
| last_msg = self.messages[-1] | ||
| recipient = last_msg.recipient | ||
| if recipient is None: | ||
| if recipient is not None: | ||
| recipient = sanitize_harmony_name(recipient) | ||
| if not recipient: | ||
| return False | ||
| if recipient.startswith("browser."): | ||
| return "browser" in self.available_tools | ||
|
|
@@ -685,6 +688,8 @@ async def call_tool(self) -> list[Message]: | |
| last_msg = self.messages[-1] | ||
| recipient = last_msg.recipient | ||
| if recipient is not None: | ||
| recipient = sanitize_harmony_name(recipient) | ||
| if recipient: | ||
|
Comment on lines
690
to
+692
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The To remediate this, ensure that the recipient is sanitized before being assigned back to the message object or used as an author name. if recipient is not None:
recipient = sanitize_harmony_name(recipient)
last_msg.recipient = recipient
if recipient: |
||
| if recipient.startswith("browser."): | ||
| return await self.call_search_tool( | ||
| self._tool_sessions["browser"], last_msg | ||
|
|
@@ -708,7 +713,7 @@ async def call_search_tool( | |
| self.called_tools.add("browser") | ||
| if isinstance(tool_session, Tool): | ||
| return await tool_session.get_result(self) | ||
| tool_name = last_msg.recipient.split(".")[1] | ||
| tool_name = sanitize_harmony_name(last_msg.recipient.split(".")[1]) | ||
| if envs.VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY: | ||
| try: | ||
| args = json.loads(last_msg.content[0].text) | ||
|
|
@@ -795,7 +800,9 @@ async def call_container_tool( | |
| self.called_tools.add("container") | ||
| if isinstance(tool_session, Tool): | ||
| return await tool_session.get_result(self) | ||
| tool_name = last_msg.recipient.split(".")[1].split(" ")[0] | ||
| tool_name = sanitize_harmony_name( | ||
| last_msg.recipient.split(".")[1].split(" ")[0] | ||
| ) | ||
| if envs.VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY: | ||
| try: | ||
| args = json.loads(last_msg.content[0].text) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test verifies that the parser produces two messages, but it only asserts the content of the first message. To ensure the error recovery logic for malformed headers is fully functional, it's important to also assert the content of the second message, which should have been parsed correctly after skipping the garbage tokens.