Skip to content

Commit c7e56ac

Browse files
author
Andrew Xia
committed
dont use sentence types
Signed-off-by: Andrew Xia <[email protected]>
1 parent 9de7a87 commit c7e56ac

File tree

4 files changed

+54
-30
lines changed

4 files changed

+54
-30
lines changed

vllm/entrypoints/chat_utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@
3636
)
3737
from openai.types.chat.chat_completion_content_part_input_audio_param import InputAudio
3838
from openai.types.responses import ResponseInputImageParam
39+
from openai.types.responses.response_reasoning_item import (
40+
Content as ResponseReasoningTextContent,
41+
)
3942
from openai_harmony import Message as OpenAIHarmonyMessage
4043
from PIL import Image
4144
from pydantic import BaseModel, ConfigDict, TypeAdapter
@@ -216,6 +219,7 @@ class CustomThinkCompletionContentParam(TypedDict, total=False):
216219
| CustomChatCompletionContentSimpleVideoParam
217220
| str
218221
| CustomThinkCompletionContentParam
222+
| ResponseReasoningTextContent
219223
)
220224

221225

vllm/entrypoints/context.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,9 @@ def append_output(self, output) -> None:
223223
for token_id in output_token_ids:
224224
self.parser.process(token_id)
225225

226+
def append_tool_output(self, output) -> None:
227+
raise NotImplementedError("Should not be called.")
228+
226229
def need_builtin_tool_call(self) -> bool:
227230
return False
228231

vllm/entrypoints/openai/parser/parser.py

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,14 @@
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
import logging
44

5-
from vllm.entrypoints.openai.parser.sentence import Author, Role, Sentence, TextContent
5+
from openai.types.chat.chat_completion_content_part_text_param import (
6+
ChatCompletionContentPartTextParam,
7+
)
8+
from openai.types.responses.response_reasoning_item import (
9+
Content as ResponseReasoningTextContent,
10+
)
11+
12+
from vllm.entrypoints.chat_utils import CustomChatCompletionMessageParam
613
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
714

815
logger = logging.getLogger(__name__)
@@ -12,17 +19,20 @@ class StreamableParser:
1219
"""Incremental parser over completion tokens with reasoning support."""
1320

1421
def __init__(self, *, tokenizer, reasoning_parser: ReasoningParser):
15-
self.sentences: list[Sentence] = []
22+
self.chat_completion_messages: list[CustomChatCompletionMessageParam] = []
1623
self.tokens: list[int] = []
1724
self.tokenizer = tokenizer
1825

1926
# Initialize reasoning parser instance if provided
2027
self.reasoning_parser_instance = reasoning_parser(tokenizer)
2128

2229
# start like this
23-
self.current_role = Role.ASSISTANT
24-
self.current_sentence = Sentence(
25-
author=Author(role=self.current_role), content=[]
30+
self.current_role = "assistant"
31+
# self.current_sentence = Sentence(
32+
# author=Author(role=self.current_role), content=[]
33+
# )
34+
self.current_chat_completion_message = CustomChatCompletionMessageParam(
35+
role=self.current_role, content=[]
2636
)
2737
self.current_channel = "think"
2838
self.current_text = ""
@@ -38,20 +48,27 @@ def process(self, token: int) -> "StreamableParser":
3848
self.tokens.append(token)
3949
decoded = self.tokenizer.decode(token)
4050
if self.reasoning_parser_instance.is_reasoning_end([token]):
41-
new_content = TextContent(
42-
text=self.current_text, channel=self.current_channel
51+
# TODO: how to capture reasoning?
52+
# new_content = {
53+
# "role": "assistant",
54+
# "reasoning_content": self.current_text
55+
# }
56+
57+
new_content = ResponseReasoningTextContent(
58+
text=self.current_text, type="reasoning_text"
4359
)
44-
self.current_sentence.content.append(new_content)
60+
61+
self.current_chat_completion_message["content"].append(new_content)
4562

4663
self.current_text = ""
4764
self.current_channel = "final"
4865
elif token == self.tokenizer.eos_token_id:
4966
# end of sentence
50-
new_content = TextContent(
51-
text=self.current_text, channel=self.current_channel
67+
new_content = ChatCompletionContentPartTextParam(
68+
text=self.current_text, type="text"
5269
)
53-
self.current_sentence.content.append(new_content)
54-
self.sentences.append(self.current_sentence)
70+
self.current_chat_completion_message["content"].append(new_content)
71+
self.chat_completion_messages.append(self.current_chat_completion_message)
5572

5673
self.current_text = ""
5774
self.current_channel = None

vllm/entrypoints/openai/serving_responses.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
from vllm.entrypoints.chat_utils import (
5656
ChatCompletionMessageParam,
5757
ChatTemplateContentFormatOption,
58+
CustomChatCompletionMessageParam,
5859
)
5960
from vllm.entrypoints.context import (
6061
ConversationContext,
@@ -76,7 +77,6 @@
7677
render_for_completion,
7778
)
7879
from vllm.entrypoints.logger import RequestLogger
79-
from vllm.entrypoints.openai.parser.sentence import Sentence
8080
from vllm.entrypoints.openai.protocol import (
8181
DeltaMessage,
8282
ErrorResponse,
@@ -271,7 +271,9 @@ async def create_responses(
271271
| ErrorResponse
272272
):
273273
error_check_ret = await self._check_model(request)
274-
import fbvscode; fbvscode.set_trace()
274+
import fbvscode
275+
276+
fbvscode.set_trace()
275277
if error_check_ret is not None:
276278
logger.error("Error with model %s", error_check_ret)
277279
return error_check_ret
@@ -611,9 +613,9 @@ async def responses_full_generator(
611613
else:
612614
status = "incomplete"
613615
elif isinstance(context, ParsableContext):
614-
sentences = context.parser.sentences
616+
chat_completion_messages = context.parser.chat_completion_messages
615617
output = self._make_response_output_items_from_parsable_context(
616-
request, sentences
618+
request, chat_completion_messages
617619
)
618620

619621
# TODO: context for non-gptoss models doesn't use messages
@@ -784,7 +786,9 @@ def _create_stream_response_logprobs(
784786
]
785787

786788
def _make_response_output_items_from_parsable_context(
787-
self, request: ResponsesRequest, sentences: list[Sentence]
789+
self,
790+
request: ResponsesRequest,
791+
chat_completion_messages: list[CustomChatCompletionMessageParam],
788792
) -> list[ResponseOutputItem]:
789793
"""Given a list of sentences, construct ResponseOutput Items.
790794
@@ -795,29 +799,25 @@ def _make_response_output_items_from_parsable_context(
795799
"""
796800
output_items: list[ResponseOutputItem] = []
797801

798-
for sentence in sentences:
799-
for text_content in sentence.content:
800-
channel = text_content.channel
801-
text = text_content.text
802-
803-
if channel == "think" or channel == "analysis":
802+
for sentence in chat_completion_messages:
803+
for text_content in sentence["content"]:
804+
if isinstance(text_content, ResponseReasoningTextContent):
804805
# Reasoning content
805806
reasoning_item = ResponseReasoningItem(
806807
id=f"rs_{random_uuid()}",
807808
summary=[],
808809
type="reasoning",
809-
content=[
810-
ResponseReasoningTextContent(
811-
text=text, type="reasoning_text"
812-
)
813-
],
810+
content=[text_content],
814811
status="completed",
815812
)
816813
output_items.append(reasoning_item)
817-
elif channel == "final":
814+
elif (
815+
isinstance(text_content, dict)
816+
and text_content.get("type") == "text"
817+
):
818818
# Final output content
819819
output_text = ResponseOutputText(
820-
text=text,
820+
text=text_content["text"],
821821
annotations=[],
822822
type="output_text",
823823
logprobs=None, # Not available from parser

0 commit comments

Comments
 (0)