diff --git a/tests/entrypoints/openai/test_gptoss_structural_tags_integration.py b/tests/entrypoints/openai/test_gptoss_structural_tags_integration.py index 2c481cc711dc..47f841540eba 100644 --- a/tests/entrypoints/openai/test_gptoss_structural_tags_integration.py +++ b/tests/entrypoints/openai/test_gptoss_structural_tags_integration.py @@ -23,6 +23,7 @@ def mock_tokenizer(self): """Create a mock tokenizer.""" tokenizer = Mock() tokenizer.encode = Mock(return_value=[1, 2, 3, 4, 5]) + tokenizer.vocab = {"<|end|>": 6} return tokenizer @pytest.fixture diff --git a/tests/reasoning/test_gptoss_reasoning_parser.py b/tests/reasoning/test_gptoss_reasoning_parser.py index 873135d5717f..6013fa642edd 100644 --- a/tests/reasoning/test_gptoss_reasoning_parser.py +++ b/tests/reasoning/test_gptoss_reasoning_parser.py @@ -17,7 +17,9 @@ def gpt_oss_tokenizer(): USER_MESSAGE_START = "<|start|>user<|message|>" REASONING_SECTION_START = "<|end|><|start|>assistant<|channel|>analysis<|message|>" -ASSISTANT_CONTENT_START_PREFIX = "<|end|><|start|>assistant<|channel|>final" +END = "<|end|>" +ASSISTANT_START = "<|start|>assistant" +ASSISTANT_CONTENT_START_PREFIX = END + ASSISTANT_START + "<|channel|>final" ASSISTANT_CONTENT_START_SUFFIX = "<|message|>" ASSISTANT_CONTENT_START = ( ASSISTANT_CONTENT_START_PREFIX + ASSISTANT_CONTENT_START_SUFFIX @@ -97,6 +99,20 @@ def gpt_oss_tokenizer(): "is_reasoning_end": True, } +MULTI_TURN_CONTENT = { + "output": USER_MESSAGE_START + + "1st turn user message" + + REASONING_SECTION_START + + "1st turn reasoning" + + ASSISTANT_CONTENT_START + + "1st turn response" + + END + + USER_MESSAGE_START + + "2nd turn user message" + + END + + ASSISTANT_START, + "is_reasoning_end": False, +} TEST_CASES = [ BASIC_CONTENT, BASIC_REASONING_ONLY, @@ -106,6 +122,7 @@ def gpt_oss_tokenizer(): COMPLEX_CONTENT_1, COMPLEX_CONTENT_1_WITH_CONTENT, COMPLEX_CONTENT_2, + MULTI_TURN_CONTENT, ] diff --git a/tests/v1/structured_output/test_gptoss_structural_tags.py b/tests/v1/structured_output/test_gptoss_structural_tags.py index 0d49487302f4..fafa9d8ed465 100644 --- a/tests/v1/structured_output/test_gptoss_structural_tags.py +++ b/tests/v1/structured_output/test_gptoss_structural_tags.py @@ -25,6 +25,7 @@ def mock_tokenizer(self): """Create a mock tokenizer for testing.""" tokenizer = Mock() tokenizer.encode = Mock(return_value=[1, 2, 3, 4, 5]) + tokenizer.vocab = {"<|end|>": 6} return tokenizer @pytest.fixture diff --git a/vllm/reasoning/gptoss_reasoning_parser.py b/vllm/reasoning/gptoss_reasoning_parser.py index 186c4e5c7f98..599392e36374 100644 --- a/vllm/reasoning/gptoss_reasoning_parser.py +++ b/vllm/reasoning/gptoss_reasoning_parser.py @@ -76,6 +76,9 @@ def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs): "<|channel|>final" ) self.reasoning_end_token_ids_suffix = self.model_tokenizer.encode("<|message|>") + # We also need to check for the <|end|> token to avoid false positives from + # previous messages in multi-turn conversations. + self.eom_token_id = self.model_tokenizer.vocab["<|end|>"] self.reasoning_max_num_between_tokens = 20 def is_reasoning_end(self, input_ids: Sequence[int]) -> bool: @@ -86,6 +89,12 @@ def is_reasoning_end(self, input_ids: Sequence[int]) -> bool: # Check if the end sequence is present in the input_ids. # We search from the end of input_ids to find the last match. for i in range(len(input_ids) - len(end_token_ids_prefix), -1, -1): + if input_ids[i] == self.eom_token_id: + # We looped backwards far enough to find the end of a previous message, + # which means we have searched the entirety of the current message + # and can exit early without searching further back into prior + # messages of the conversation. + return False if input_ids[i : i + len(end_token_ids_prefix)] == end_token_ids_prefix: # We have found the prefix, now we look for the suffix after the prefix. suffix_start = i + len(end_token_ids_prefix)