Skip to content

Commit 69022c4

Browse files
author
Andrew Xia
committed
working mcp
Signed-off-by: Andrew Xia <[email protected]>
1 parent aa4ecbc commit 69022c4

File tree

4 files changed

+142
-17
lines changed

4 files changed

+142
-17
lines changed

vllm/entrypoints/context.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515
from openai_harmony import Author, Message, Role, StreamState, TextContent
1616

1717
from vllm import envs
18-
from vllm.entrypoints.chat_utils import CustomChatCompletionMessageParam
18+
from vllm.entrypoints.chat_utils import (
19+
ChatTemplateContentFormatOption,
20+
CustomChatCompletionMessageParam,
21+
)
1922
from vllm.entrypoints.harmony_utils import (
2023
get_encoding,
2124
get_streamable_parser_for_assistant,
@@ -193,6 +196,9 @@ def __init__(
193196
request: ResponsesRequest,
194197
available_tools: list[str] | None,
195198
tool_parser_cls,
199+
chat_template: str | None,
200+
chat_template_content_format: ChatTemplateContentFormatOption,
201+
tool_dicts: list[dict] | None = None,
196202
):
197203
self.last_output = None
198204
self.num_prompt_tokens = 0
@@ -210,6 +216,8 @@ def __init__(
210216
request=request,
211217
tool_parser_cls=tool_parser_cls,
212218
)
219+
self.tool_parser_cls = tool_parser_cls
220+
self.request = request
213221
self.tokenizer = tokenizer
214222
self.reasoning_parser = reasoning_parser
215223

@@ -220,6 +228,10 @@ def __init__(
220228
self._tool_sessions: dict[str, ClientSession | Tool] = {}
221229
self.called_tools: set[str] = set()
222230

231+
self.chat_template = chat_template
232+
self.chat_template_content_format = chat_template_content_format
233+
self.tool_dicts = tool_dicts
234+
223235
def append_output(
224236
self, output: RequestOutput | list[CustomChatCompletionMessageParam]
225237
) -> None:
@@ -252,8 +264,9 @@ async def call_python_tool(
252264
self.called_tools.add("python")
253265
if isinstance(tool_session, Tool):
254266
return await tool_session.get_result(self)
267+
args = json.loads(last_msg.arguments)
255268
param = {
256-
"code": last_msg.arguments,
269+
"code": args['code'],
257270
}
258271
result = await tool_session.call_tool("python", param)
259272
result_str = result.content[0].text
@@ -263,7 +276,9 @@ async def call_python_tool(
263276

264277
message = CustomChatCompletionMessageParam(
265278
role="tool",
266-
content=[ChatCompletionContentPartTextParam(text=content, type="text")],
279+
content=[
280+
ChatCompletionContentPartTextParam(text=content, type="text")
281+
], # TODO: why is this nested?
267282
)
268283

269284
return [message]
@@ -281,8 +296,16 @@ async def call_tool(self) -> list[CustomChatCompletionMessageParam]:
281296
# if recipient is not None and recipient.startswith("python"):
282297
# return await self.call_python_tool(self._tool_sessions["python"], last_tool_request)
283298

284-
def render_for_completion(self) -> list[int]:
285-
raise NotImplementedError("Should not be called.")
299+
def render_for_completion(self):
300+
return [
301+
self.request,
302+
self.tokenizer,
303+
self.parser.chat_completion_messages,
304+
self.tool_dicts,
305+
self.tool_parser_cls,
306+
self.chat_template,
307+
self.chat_template_content_format,
308+
]
286309

287310
async def init_tool_sessions(
288311
self,

vllm/entrypoints/openai/parser/parser.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@
55
from openai.types.chat.chat_completion_content_part_text_param import (
66
ChatCompletionContentPartTextParam,
77
)
8-
from openai.types.responses.response_reasoning_item import (
9-
Content as ResponseReasoningTextContent,
10-
)
118

129
from vllm.entrypoints.chat_utils import CustomChatCompletionMessageParam
1310
from vllm.entrypoints.openai.protocol import FunctionCall, ResponsesRequest
@@ -63,7 +60,7 @@ def process(self, output: CompletionOutput) -> "StreamableParser":
6360
)
6461
)
6562
if reasoning_content:
66-
new_content = ResponseReasoningTextContent(
63+
new_content = ChatCompletionContentPartTextParam(
6764
text=reasoning_content, type="reasoning_text"
6865
)
6966

@@ -77,6 +74,7 @@ def process(self, output: CompletionOutput) -> "StreamableParser":
7774
if tool_call_info is not None and tool_call_info.tools_called:
7875
# extract_tool_calls() returns a list of tool calls.
7976
function_calls.extend(
77+
# TODO: this should be a TypedDict
8078
FunctionCall(
8179
name=tool_call.function.name,
8280
arguments=tool_call.function.arguments,
@@ -92,8 +90,10 @@ def process(self, output: CompletionOutput) -> "StreamableParser":
9290
self.current_chat_completion_message["content"].extend(function_calls)
9391

9492
self.chat_completion_messages.append(self.current_chat_completion_message)
95-
# if len(function_calls) > 0:
96-
# TODO: add a tool call to the parser
93+
94+
self.current_chat_completion_message = CustomChatCompletionMessageParam(
95+
role=self.current_role, content=[]
96+
)
9797

9898
return self
9999

vllm/entrypoints/openai/serving_engine.py

Lines changed: 81 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,6 +1179,61 @@ async def _process_inputs(
11791179
)
11801180
return engine_request, tokenization_kwargs
11811181

1182+
async def _render_next_turn(
1183+
self,
1184+
request,
1185+
tokenizer,
1186+
messages,
1187+
tool_dicts,
1188+
tool_parser,
1189+
chat_template,
1190+
chat_template_content_format,
1191+
):
1192+
new_messages = []
1193+
for item in messages:
1194+
if item["role"] == "user" or item["role"] == "tool":
1195+
new_messages.append(item)
1196+
elif item["role"] == "assistant":
1197+
for content in item["content"]:
1198+
if isinstance(content, FunctionCall):
1199+
new_msg = {
1200+
"role": "assistant",
1201+
"tool_calls": [
1202+
{
1203+
"id": "dafsdfdsa",
1204+
"type": "function",
1205+
"function": {
1206+
"name": content.name,
1207+
"arguments": content.arguments,
1208+
},
1209+
}
1210+
],
1211+
}
1212+
new_messages.append(new_msg)
1213+
elif content["type"] == "text":
1214+
new_messages.append(
1215+
{"role": "assistant", "content": content["text"]}
1216+
)
1217+
elif content["type"] == "reasoning_text":
1218+
reasoning_content = content["text"]
1219+
new_messages.append(
1220+
{
1221+
"role": "assistant",
1222+
"content": "<think>" + reasoning_content + "</think>",
1223+
}
1224+
)
1225+
1226+
_, request_prompts, engine_prompts = await self._preprocess_chat(
1227+
request,
1228+
tokenizer,
1229+
new_messages,
1230+
tool_dicts=tool_dicts,
1231+
tool_parser=tool_parser,
1232+
chat_template=chat_template,
1233+
chat_template_content_format=chat_template_content_format,
1234+
)
1235+
return request_prompts, engine_prompts
1236+
11821237
async def _generate_with_builtin_tools(
11831238
self,
11841239
request_id: str,
@@ -1238,11 +1293,33 @@ async def _generate_with_builtin_tools(
12381293

12391294
# Create inputs for the next turn.
12401295
# Render the next prompt token ids.
1241-
prompt_token_ids = context.render_for_completion()
1242-
engine_prompt = EngineTokensPrompt(prompt_token_ids=prompt_token_ids)
1243-
request_prompt = prompt_token_ids
1296+
[
1297+
request,
1298+
tokenizer,
1299+
messages,
1300+
tool_dicts,
1301+
tool_parser,
1302+
chat_template,
1303+
chat_template_content_format,
1304+
] = context.render_for_completion()
1305+
1306+
# HACK
1307+
request_prompts, engine_prompts = await self._render_next_turn(
1308+
request,
1309+
tokenizer,
1310+
messages,
1311+
tool_dicts,
1312+
tool_parser,
1313+
chat_template,
1314+
chat_template_content_format,
1315+
)
1316+
engine_prompt = engine_prompts[0]
1317+
request_prompt = request_prompts[0]
1318+
1319+
# engine_prompt = EngineTokensPrompt(prompt_token_ids=prompt_token_ids)
1320+
# request_prompt = prompt_token_ids
12441321
# Update the sampling params.
1245-
sampling_params.max_tokens = self.max_model_len - len(prompt_token_ids)
1322+
sampling_params.max_tokens = self.max_model_len - len(engine_prompt)
12461323
# OPTIMIZATION
12471324
priority = orig_priority - 1
12481325

vllm/entrypoints/openai/serving_responses.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,14 @@ async def create_responses(
390390
request=request,
391391
tool_parser_cls=self.tool_parser,
392392
available_tools=available_tools,
393+
tool_dicts=[
394+
convert_tool_responses_to_completions_format(
395+
tool.model_dump()
396+
)
397+
for tool in request.tools
398+
],
399+
chat_template=self.chat_template,
400+
chat_template_content_format=self.chat_template_content_format,
393401
)
394402
else:
395403
context = SimpleContext()
@@ -804,6 +812,16 @@ def _make_response_output_items_from_parsable_context(
804812
output_items: list[ResponseOutputItem] = []
805813

806814
for sentence in chat_completion_messages:
815+
# if sentence['role'] == 'tool':
816+
# TODO: this should be a McpCall type
817+
# function_call_output = ResponseFunctionToolCallOutputItem(
818+
# id=f"fc_{random_uuid()}",
819+
# call_id=f"call_{random_uuid()}",
820+
# type="function_call_output",
821+
# status="completed",
822+
# output=sentence['content'][0]['text'].text,
823+
# )
824+
# output_items.append(function_call_output)
807825
if sentence["role"] != "assistant":
808826
# This could be a system/user message, and
809827
# This is a message from a tool to the assistant (e.g., search result).
@@ -812,13 +830,20 @@ def _make_response_output_items_from_parsable_context(
812830
continue
813831

814832
for content in sentence["content"]:
815-
if isinstance(content, ResponseReasoningTextContent):
833+
if (
834+
isinstance(content, dict)
835+
and content.get("type") == "reasoning_text"
836+
):
816837
# Reasoning content
817838
reasoning_item = ResponseReasoningItem(
818839
id=f"rs_{random_uuid()}",
819840
summary=[],
820841
type="reasoning",
821-
content=[content],
842+
content=[
843+
ResponseReasoningTextContent(
844+
text=content["text"], type="reasoning_text"
845+
)
846+
],
822847
status="completed",
823848
)
824849
output_items.append(reasoning_item)

0 commit comments

Comments
 (0)