@@ -93,17 +93,35 @@ def _update_num_prompt_tokens(self, output: RequestOutput):
9393 # as new prompt each time. Hence the sum.
9494 self .num_prompt_tokens += len (output .prompt_token_ids )
9595
96+ def _update_num_cached_tokens (self , output : RequestOutput ):
97+ if output .num_cached_tokens is not None :
98+ #Similar to num_prompt_tokens
99+ self .num_cached_tokens += output .num_cached_tokens
100+
96101 def _update_num_output_tokens (self , token_ids : Sequence [int ]):
97102 self .num_output_tokens += len (token_ids )
98103
104+ def _update_num_reasoning_tokens (self , token_ids : Sequence [int ]):
105+ # Count tokens that are part of reasoning content (analysis channel
106+ # or tool-directed messages like python/browser calls)
107+ is_analysis = self .parser .current_channel == "analysis"
108+ is_tool_call = (self .parser .current_recipient is not None and
109+ (self .parser .current_recipient .startswith ("python" ) or
110+ self .parser .current_recipient .startswith ("browser." )))
111+ if is_analysis or is_tool_call :
112+ self .num_reasoning_tokens += len (token_ids )
113+
99114 def append_output (self , output ) -> None :
100115 if isinstance (output , RequestOutput ):
101116 self ._update_num_prompt_tokens (output )
117+ self ._update_num_cached_tokens (output )
102118 output_token_ids = output .outputs [0 ].token_ids
103119 self ._update_num_output_tokens (output_token_ids )
104120 self .parser = get_streamable_parser_for_assistant ()
105121 for token_id in output_token_ids :
106122 self .parser .process (token_id )
123+ # Check if the current token is part of reasoning content
124+ self ._update_num_reasoning_tokens ([token_id ])
107125 output_msgs = self .parser .messages
108126 else :
109127 # Tool output.
@@ -204,6 +222,7 @@ def append_output(self, output) -> None:
204222 # so we only want to add the prompt tokens once for each message.
205223 if self .first_tok_of_message :
206224 self ._update_num_prompt_tokens (output )
225+ self ._update_num_cached_tokens (output )
207226 # Reset self.first_tok_of_message if needed:
208227 # if the current token is the last one of the current message
209228 # (finished=True), then the next token processed will mark the
@@ -212,6 +231,8 @@ def append_output(self, output) -> None:
212231 tok = output .outputs [0 ].token_ids [0 ]
213232 self .parser .process (tok )
214233 self ._update_num_output_tokens (output .outputs [0 ].token_ids )
234+ # Check if the current token is part of reasoning content
235+ self ._update_num_reasoning_tokens ([tok ])
215236 self .last_tok = tok
216237 else :
217238 # Handle the case of tool output in direct message format
0 commit comments