From 21472a1a05d7752fa85b250a87e781ce775c047e Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 15:59:17 -0700 Subject: [PATCH 01/21] try print request info Signed-off-by: Brian Yu --- nemo_gym/server_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nemo_gym/server_utils.py b/nemo_gym/server_utils.py index 7760bb32e..7f9e2ac97 100644 --- a/nemo_gym/server_utils.py +++ b/nemo_gym/server_utils.py @@ -150,7 +150,8 @@ async def request( async def raise_for_status(response: ClientResponse) -> None: # pragma: no cover if not response.ok: content = await response.content.read() - print(content) + print(f"""Request info: {response.request_info} +Response content: {content}""") response.raise_for_status() From f30134fc0569d4431f7d6261c76a5d51aa3bdc18 Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 17:05:59 -0700 Subject: [PATCH 02/21] try print request kwargs Signed-off-by: Brian Yu --- nemo_gym/openai_utils.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/nemo_gym/openai_utils.py b/nemo_gym/openai_utils.py index 49a4b53ed..74af8b1e7 100644 --- a/nemo_gym/openai_utils.py +++ b/nemo_gym/openai_utils.py @@ -13,6 +13,7 @@ # limitations under the License. from asyncio import sleep from typing import ( + Any, Dict, List, Literal, @@ -442,33 +443,42 @@ async def _request(self, **request_kwargs: Dict) -> ClientResponse: else: return response + async def _raise_for_status(self, response: ClientResponse, request_kwargs: Dict[str, Any]) -> None: + if not response.ok: + print(f"Request kwargs: {request_kwargs}") + + await raise_for_status(response) + async def create_chat_completion(self, **kwargs): - response = await self._request( - method="POST", + request_kwargs = dict( url=f"{self.base_url}/chat/completions", json=kwargs, headers={"Authorization": f"Bearer {self.api_key}"}, ) - await raise_for_status(response) + response = await self._request(method="POST", **request_kwargs) + + await self._raise_for_status(response, request_kwargs) return await response.json() async def create_response(self, **kwargs): - response = await self._request( - method="POST", + request_kwargs = dict( url=f"{self.base_url}/responses", json=kwargs, headers={"Authorization": f"Bearer {self.api_key}"}, ) - await raise_for_status(response) + response = await self._request(method="POST", **request_kwargs) + + await self._raise_for_status(response, request_kwargs) return await response.json() async def create_tokenize(self, **kwargs): base_url = self.base_url.removesuffix("/v1") - response = await self._request( - method="POST", + request_kwargs = dict( url=f"{base_url}/tokenize", json=kwargs, headers={"Authorization": f"Bearer {self.api_key}"}, ) - await raise_for_status(response) + response = await self._request(method="POST", **request_kwargs) + + await self._raise_for_status(response, request_kwargs) return await response.json() From e1bbb384b8a905996ac65f2825f577ef91b51117 Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 17:06:32 -0700 Subject: [PATCH 03/21] print json Signed-off-by: Brian Yu --- nemo_gym/openai_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nemo_gym/openai_utils.py b/nemo_gym/openai_utils.py index 74af8b1e7..7fe9e67a6 100644 --- a/nemo_gym/openai_utils.py +++ b/nemo_gym/openai_utils.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import json from asyncio import sleep from typing import ( Any, @@ -445,7 +446,7 @@ async def _request(self, **request_kwargs: Dict) -> ClientResponse: async def _raise_for_status(self, response: ClientResponse, request_kwargs: Dict[str, Any]) -> None: if not response.ok: - print(f"Request kwargs: {request_kwargs}") + print(f"Request kwargs: {json.dumps(request_kwargs)}") await raise_for_status(response) From 49405ff39f708de2f167b72798b036dc69932fe1 Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 17:10:17 -0700 Subject: [PATCH 04/21] raise for status in run examples Signed-off-by: Brian Yu --- nemo_gym/rollout_collection.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nemo_gym/rollout_collection.py b/nemo_gym/rollout_collection.py index 0969a993a..bdf0f2ebf 100644 --- a/nemo_gym/rollout_collection.py +++ b/nemo_gym/rollout_collection.py @@ -79,6 +79,7 @@ async def _post_coroutine(row: dict) -> None: row["responses_create_params"] = row["responses_create_params"] | config.responses_create_params async with semaphore: response = await server_client.post(server_name=config.agent_name, url_path="/run", json=row) + response.raise_for_status() result = await response.json() f.write(json.dumps(result) + "\n") metrics.update({k: v for k, v in result.items() if isinstance(v, (int, float))}) @@ -96,6 +97,7 @@ async def run_examples( async def _post_subroutine(row: Dict) -> Dict: res = await server_client.post(server_name=row.pop("agent_ref")["name"], url_path="/run", json=row) + res.raise_for_status() return await res.json() return await tqdm.gather(*map(_post_subroutine, examples), desc="Collecting rollouts", miniters=10) From b39ecc487559c158a87bae1296020f07ccb760d6 Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 17:36:04 -0700 Subject: [PATCH 05/21] dont skip special tokens Signed-off-by: Brian Yu --- responses_api_models/vllm_model/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index 96dbc550f..2314dbd84 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -147,7 +147,7 @@ async def chat_completions( create_params = body_dict # Always disable skip_special_tokens to preserve tags for reasoning parsing - create_params |= dict(skip_special_tokens=False) + # create_params |= dict(skip_special_tokens=False) if self.config.return_token_id_information: create_params |= dict( From ff8e3a178c6fdc05449e838e7257d59280958221 Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 17:59:12 -0700 Subject: [PATCH 06/21] don't raise for status Signed-off-by: Brian Yu --- nemo_gym/rollout_collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_gym/rollout_collection.py b/nemo_gym/rollout_collection.py index bdf0f2ebf..93767aa04 100644 --- a/nemo_gym/rollout_collection.py +++ b/nemo_gym/rollout_collection.py @@ -97,7 +97,7 @@ async def run_examples( async def _post_subroutine(row: Dict) -> Dict: res = await server_client.post(server_name=row.pop("agent_ref")["name"], url_path="/run", json=row) - res.raise_for_status() + # res.raise_for_status() return await res.json() return await tqdm.gather(*map(_post_subroutine, examples), desc="Collecting rollouts", miniters=10) From 2042edeea989a625954da153de980b7f61c8d3aa Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 18:11:16 -0700 Subject: [PATCH 07/21] raise again Signed-off-by: Brian Yu --- nemo_gym/rollout_collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_gym/rollout_collection.py b/nemo_gym/rollout_collection.py index 93767aa04..bdf0f2ebf 100644 --- a/nemo_gym/rollout_collection.py +++ b/nemo_gym/rollout_collection.py @@ -97,7 +97,7 @@ async def run_examples( async def _post_subroutine(row: Dict) -> Dict: res = await server_client.post(server_name=row.pop("agent_ref")["name"], url_path="/run", json=row) - # res.raise_for_status() + res.raise_for_status() return await res.json() return await tqdm.gather(*map(_post_subroutine, examples), desc="Collecting rollouts", miniters=10) From 41a7de07d055f91e3313e5fa3d6cdb84e8f8d00a Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 18:25:52 -0700 Subject: [PATCH 08/21] use mininterval Signed-off-by: Brian Yu --- nemo_gym/rollout_collection.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nemo_gym/rollout_collection.py b/nemo_gym/rollout_collection.py index bdf0f2ebf..52b72cba7 100644 --- a/nemo_gym/rollout_collection.py +++ b/nemo_gym/rollout_collection.py @@ -100,7 +100,9 @@ async def _post_subroutine(row: Dict) -> Dict: res.raise_for_status() return await res.json() - return await tqdm.gather(*map(_post_subroutine, examples), desc="Collecting rollouts", miniters=10) + return await tqdm.gather( + *map(_post_subroutine, examples), desc="Collecting rollouts", miniters=10, mininterval=10 + ) def setup_server_client(self, head_server_config: Optional[BaseServerConfig] = None) -> ServerClient: server_client = ServerClient.load_from_global_config(head_server_config) From a159d6dc9793b804658a3f94529530d23cb4daa0 Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 18:33:11 -0700 Subject: [PATCH 09/21] use maxinterval Signed-off-by: Brian Yu --- nemo_gym/rollout_collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_gym/rollout_collection.py b/nemo_gym/rollout_collection.py index 52b72cba7..bcdf073bb 100644 --- a/nemo_gym/rollout_collection.py +++ b/nemo_gym/rollout_collection.py @@ -101,7 +101,7 @@ async def _post_subroutine(row: Dict) -> Dict: return await res.json() return await tqdm.gather( - *map(_post_subroutine, examples), desc="Collecting rollouts", miniters=10, mininterval=10 + *map(_post_subroutine, examples), desc="Collecting rollouts", miniters=10, maxinterval=10 ) def setup_server_client(self, head_server_config: Optional[BaseServerConfig] = None) -> ServerClient: From f78994fbed125e4fab926be9ad6feb3b3704c0d5 Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 18:38:15 -0700 Subject: [PATCH 10/21] revert max interval Signed-off-by: Brian Yu --- nemo_gym/rollout_collection.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/nemo_gym/rollout_collection.py b/nemo_gym/rollout_collection.py index bcdf073bb..bdf0f2ebf 100644 --- a/nemo_gym/rollout_collection.py +++ b/nemo_gym/rollout_collection.py @@ -100,9 +100,7 @@ async def _post_subroutine(row: Dict) -> Dict: res.raise_for_status() return await res.json() - return await tqdm.gather( - *map(_post_subroutine, examples), desc="Collecting rollouts", miniters=10, maxinterval=10 - ) + return await tqdm.gather(*map(_post_subroutine, examples), desc="Collecting rollouts", miniters=10) def setup_server_client(self, head_server_config: Optional[BaseServerConfig] = None) -> ServerClient: server_client = ServerClient.load_from_global_config(head_server_config) From 5701fb2adbf87ada48ab264ad37611177bd558a5 Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 18:57:19 -0700 Subject: [PATCH 11/21] print inside chat completion Signed-off-by: Brian Yu --- nemo_gym/openai_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nemo_gym/openai_utils.py b/nemo_gym/openai_utils.py index 7fe9e67a6..2e63a993a 100644 --- a/nemo_gym/openai_utils.py +++ b/nemo_gym/openai_utils.py @@ -456,6 +456,8 @@ async def create_chat_completion(self, **kwargs): json=kwargs, headers={"Authorization": f"Bearer {self.api_key}"}, ) + # TODO Remove + print(f"Hit chat completion with {json.dumps(request_kwargs)}") response = await self._request(method="POST", **request_kwargs) await self._raise_for_status(response, request_kwargs) From 8cd54f783b76883337c587d958cce4b0f3aa6413 Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 19:04:35 -0700 Subject: [PATCH 12/21] just print hit Signed-off-by: Brian Yu --- nemo_gym/openai_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nemo_gym/openai_utils.py b/nemo_gym/openai_utils.py index 2e63a993a..a6eb20e75 100644 --- a/nemo_gym/openai_utils.py +++ b/nemo_gym/openai_utils.py @@ -457,8 +457,9 @@ async def create_chat_completion(self, **kwargs): headers={"Authorization": f"Bearer {self.api_key}"}, ) # TODO Remove - print(f"Hit chat completion with {json.dumps(request_kwargs)}") + print("Hit chat completion") response = await self._request(method="POST", **request_kwargs) + print("Got response status", response.status) await self._raise_for_status(response, request_kwargs) return await response.json() From 4b3d0f7f8b534153b5166964608ee5b010be527a Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 19:08:59 -0700 Subject: [PATCH 13/21] remove Signed-off-by: Brian Yu --- nemo_gym/openai_utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/nemo_gym/openai_utils.py b/nemo_gym/openai_utils.py index a6eb20e75..7fe9e67a6 100644 --- a/nemo_gym/openai_utils.py +++ b/nemo_gym/openai_utils.py @@ -456,10 +456,7 @@ async def create_chat_completion(self, **kwargs): json=kwargs, headers={"Authorization": f"Bearer {self.api_key}"}, ) - # TODO Remove - print("Hit chat completion") response = await self._request(method="POST", **request_kwargs) - print("Got response status", response.status) await self._raise_for_status(response, request_kwargs) return await response.json() From c08b5644783d77146a3aa102caf8ae6e096930b7 Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 19:10:47 -0700 Subject: [PATCH 14/21] print chat completino response Signed-off-by: Brian Yu --- responses_api_models/vllm_model/app.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index 2314dbd84..529753198 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -98,6 +98,8 @@ async def responses( # Chat Completion Create Params -> Chat Completion chat_completion_response = await self.chat_completions(request, chat_completion_create_params) + # TODO remove + print(chat_completion_response) choice = chat_completion_response.choices[0] From 62366aff474862c2b4e4503935b554e8ee86cee4 Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 19:21:33 -0700 Subject: [PATCH 15/21] skip special tokens false Signed-off-by: Brian Yu --- responses_api_models/vllm_model/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index 529753198..e5c5a4372 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -149,7 +149,7 @@ async def chat_completions( create_params = body_dict # Always disable skip_special_tokens to preserve tags for reasoning parsing - # create_params |= dict(skip_special_tokens=False) + create_params |= dict(skip_special_tokens=False) if self.config.return_token_id_information: create_params |= dict( From 6ee32437c9e120223cd5a967421f3b4604ea612c Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 19:24:30 -0700 Subject: [PATCH 16/21] print raw dict Signed-off-by: Brian Yu --- responses_api_models/vllm_model/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index e5c5a4372..9ae715ce2 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -98,8 +98,6 @@ async def responses( # Chat Completion Create Params -> Chat Completion chat_completion_response = await self.chat_completions(request, chat_completion_create_params) - # TODO remove - print(chat_completion_response) choice = chat_completion_response.choices[0] @@ -195,6 +193,8 @@ async def chat_completions( raise NotImplementedError chat_completion_dict = await client.create_chat_completion(**create_params) + # TODO remove + print("Chat completion raw dict", chat_completion_dict) choice_dict = chat_completion_dict["choices"][0] if self.config.uses_reasoning_parser: From f94d3ec74a7feecb222ad64942a13d76f3b2bbb5 Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 19:49:38 -0700 Subject: [PATCH 17/21] remove print Signed-off-by: Brian Yu --- responses_api_models/vllm_model/app.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index 9ae715ce2..96dbc550f 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -193,8 +193,6 @@ async def chat_completions( raise NotImplementedError chat_completion_dict = await client.create_chat_completion(**create_params) - # TODO remove - print("Chat completion raw dict", chat_completion_dict) choice_dict = chat_completion_dict["choices"][0] if self.config.uses_reasoning_parser: From 59fe57ffe0b6d46f6e422ebd5e1e6ece67659d7f Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 19:52:58 -0700 Subject: [PATCH 18/21] raise for status on error Signed-off-by: Brian Yu --- nemo_gym/openai_utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/nemo_gym/openai_utils.py b/nemo_gym/openai_utils.py index 7fe9e67a6..476b9be34 100644 --- a/nemo_gym/openai_utils.py +++ b/nemo_gym/openai_utils.py @@ -77,7 +77,7 @@ from pydantic import BaseModel, ConfigDict, Field from typing_extensions import TypedDict -from nemo_gym.server_utils import ClientResponse, raise_for_status, request +from nemo_gym.server_utils import MAX_NUM_TRIES, ClientResponse, raise_for_status, request ######################################## @@ -430,7 +430,7 @@ class NeMoGymAsyncOpenAI(BaseModel): async def _request(self, **request_kwargs: Dict) -> ClientResponse: tries = 0 - while True: + while tries < MAX_NUM_TRIES: tries += 1 response = await request(**request_kwargs) # See https://platform.openai.com/docs/guides/error-codes/api-errors @@ -444,6 +444,9 @@ async def _request(self, **request_kwargs: Dict) -> ClientResponse: else: return response + # We've exited the loop + response.raise_for_status() + async def _raise_for_status(self, response: ClientResponse, request_kwargs: Dict[str, Any]) -> None: if not response.ok: print(f"Request kwargs: {json.dumps(request_kwargs)}") From ebeb3a3b0cd435b4875a3109d26e4e1f0915875a Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 21:29:50 -0700 Subject: [PATCH 19/21] fix test Signed-off-by: Brian Yu --- .../vllm_model/tests/test_app.py | 135 +++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/responses_api_models/vllm_model/tests/test_app.py b/responses_api_models/vllm_model/tests/test_app.py index cbb1b6926..283b20ab3 100644 --- a/responses_api_models/vllm_model/tests/test_app.py +++ b/responses_api_models/vllm_model/tests/test_app.py @@ -1595,6 +1595,7 @@ def test_responses_reasoning_parser(self, monkeypatch: MonkeyPatch): app = server.setup_webserver() client = TestClient(app) + # START: First turn mock_chat_completion = NeMoGymChatCompletion( id="chtcmpl-123", object="chat.completion", @@ -1806,8 +1807,19 @@ def test_responses_reasoning_parser(self, monkeypatch: MonkeyPatch): actual_messages = mock_method.call_args.kwargs["messages"] assert expected_messages == actual_messages + # START: Second turn + input_messages = [ + *input_messages, + *data["output"], + NeMoGymEasyInputMessage( + type="message", + role="user", + content=[NeMoGymResponseInputText(text="user", type="input_text")], + status="completed", + ), + ] request_body = NeMoGymResponseCreateParamsNonStreaming( - input=input_messages + data["output"], + input=input_messages, tools=input_tools, ) @@ -1901,6 +1913,127 @@ def test_responses_reasoning_parser(self, monkeypatch: MonkeyPatch): ], "reasoning_content": "Gathering order status and delivery info...", }, + {"content": [{"text": "user", "type": "text"}], "role": "user"}, + ] + actual_messages = mock_method.call_args.kwargs["messages"] + assert expected_messages == actual_messages + + # START: Third turn + input_messages = [ + *input_messages, + *data["output"], + NeMoGymEasyInputMessage( + type="message", + role="user", + content=[NeMoGymResponseInputText(text="user", type="input_text")], + status="completed", + ), + ] + request_body = NeMoGymResponseCreateParamsNonStreaming( + input=input_messages, + tools=input_tools, + ) + + mock_chat_completion = NeMoGymChatCompletion( + id="chtcmpl-123", + object="chat.completion", + created=FIXED_TIME, + model="dummy_model", + choices=[ + NeMoGymChoice( + index=0, + finish_reason="tool_calls", + message=NeMoGymChatCompletionMessage( + role="assistant", + # Test the None path ehre + content="None reasoning test", + tool_calls=[], + reasoning_content=None, + ), + ) + ], + ) + mock_method = AsyncMock(return_value=mock_chat_completion.model_dump()) + monkeypatch.setattr( + server._clients[0].__class__, + "create_chat_completion", + mock_method, + ) + + response = client.post( + "/v1/responses", + json=request_body.model_dump(exclude_unset=True, mode="json"), + ) + assert response.status_code == 200 + + data = response.json() + + expected_response = NeMoGymResponse( + **COMMON_RESPONSE_PARAMS, + id="resp_123", + object="response", + tools=input_tools, + created_at=FIXED_TIME, + model="dummy_model", + output=[ + NeMoGymResponseOutputMessage( + id="msg_123", + status="completed", + type="message", + content=[ + NeMoGymResponseOutputText( + type="output_text", + text="None reasoning test", + annotations=[], + logprobs=None, + ) + ], + ), + ], + ) + expected_dict = expected_response.model_dump() + assert data == expected_dict + + expected_messages = [ + {"content": [{"text": "Check my order status", "type": "text"}], "role": "user"}, + { + "role": "assistant", + "content": "Sure, one sec.", + "tool_calls": [], + "reasoning_content": "First reasoning item", + }, + {"content": [{"text": "cool", "type": "text"}], "role": "user"}, + { + "role": "assistant", + "content": "I'm still checking", + "tool_calls": [], + }, + {"content": [{"text": "ok", "type": "text"}], "role": "user"}, + { + "role": "assistant", + "content": " hello hello", + "tool_calls": [ + { + "id": "call_123", + "function": {"arguments": '{"order_id": "123"}', "name": "get_order_status"}, + "type": "function", + }, + { + "id": "call_234", + "function": {"arguments": '{"order_id": "234"}', "name": "get_delivery_date"}, + "type": "function", + }, + ], + "reasoning_content": "Gathering order status and delivery info...", + }, + {"content": [{"text": "user", "type": "text"}], "role": "user"}, + { + "role": "assistant", + "content": "", + "tool_calls": [], + "reasoning_content": "None content test", + }, + {"content": [{"text": "user", "type": "text"}], "role": "user"}, ] actual_messages = mock_method.call_args.kwargs["messages"] assert expected_messages == actual_messages From c8a262ab3ee0ac01773d7c8b5733f81c8c4babcb Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 21:45:16 -0700 Subject: [PATCH 20/21] prints Signed-off-by: Brian Yu --- responses_api_models/vllm_model/app.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index 96dbc550f..83e1fe1df 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -98,6 +98,8 @@ async def responses( # Chat Completion Create Params -> Chat Completion chat_completion_response = await self.chat_completions(request, chat_completion_create_params) + # TODO remove + print(f"Chat completion response: {chat_completion_response.model_dump_json()}") choice = chat_completion_response.choices[0] @@ -193,6 +195,10 @@ async def chat_completions( raise NotImplementedError chat_completion_dict = await client.create_chat_completion(**create_params) + # TODO remove + import json + + print(f"Raw response: {json.dumps(chat_completion_dict)}") choice_dict = chat_completion_dict["choices"][0] if self.config.uses_reasoning_parser: From 86ccca96a65aa29782d28245afa1a2f1dedfa876 Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Wed, 15 Oct 2025 21:49:39 -0700 Subject: [PATCH 21/21] clean print Signed-off-by: Brian Yu --- responses_api_models/vllm_model/app.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index 83e1fe1df..96dbc550f 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -98,8 +98,6 @@ async def responses( # Chat Completion Create Params -> Chat Completion chat_completion_response = await self.chat_completions(request, chat_completion_create_params) - # TODO remove - print(f"Chat completion response: {chat_completion_response.model_dump_json()}") choice = chat_completion_response.choices[0] @@ -195,10 +193,6 @@ async def chat_completions( raise NotImplementedError chat_completion_dict = await client.create_chat_completion(**create_params) - # TODO remove - import json - - print(f"Raw response: {json.dumps(chat_completion_dict)}") choice_dict = chat_completion_dict["choices"][0] if self.config.uses_reasoning_parser: