Skip to content

Commit 23051d8

Browse files
fix(streaming_handler.py): fix completion start time tracking (#9688)
* fix(streaming_handler.py): fix completion start time tracking Fixes #9210 * feat(anthropic/chat/transformation.py): map openai 'reasoning_effort' to anthropic 'thinking' param Fixes #9022 * feat: map 'reasoning_effort' to 'thinking' param across bedrock + vertex Closes #9022 (comment)
1 parent 0690f7a commit 23051d8

File tree

9 files changed

+135
-11
lines changed

9 files changed

+135
-11
lines changed

litellm/litellm_core_utils/litellm_logging.py

+5
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ def __init__(
290290
"input": _input,
291291
"litellm_params": litellm_params,
292292
"applied_guardrails": applied_guardrails,
293+
"model": model,
293294
}
294295

295296
def process_dynamic_callbacks(self):
@@ -1010,6 +1011,10 @@ def should_run_callback(
10101011
return False
10111012
return True
10121013

1014+
def _update_completion_start_time(self, completion_start_time: datetime.datetime):
1015+
self.completion_start_time = completion_start_time
1016+
self.model_call_details["completion_start_time"] = self.completion_start_time
1017+
10131018
def _success_handler_helper_fn(
10141019
self,
10151020
result=None,

litellm/litellm_core_utils/streaming_handler.py

+10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import asyncio
22
import collections.abc
3+
import datetime
34
import json
45
import threading
56
import time
@@ -1567,6 +1568,10 @@ def __next__(self): # noqa: PLR0915
15671568

15681569
if response is None:
15691570
continue
1571+
if self.logging_obj.completion_start_time is None:
1572+
self.logging_obj._update_completion_start_time(
1573+
completion_start_time=datetime.datetime.now()
1574+
)
15701575
## LOGGING
15711576
executor.submit(
15721577
self.run_success_logging_and_cache_storage,
@@ -1721,6 +1726,11 @@ async def __anext__(self): # noqa: PLR0915
17211726
if processed_chunk is None:
17221727
continue
17231728

1729+
if self.logging_obj.completion_start_time is None:
1730+
self.logging_obj._update_completion_start_time(
1731+
completion_start_time=datetime.datetime.now()
1732+
)
1733+
17241734
choice = processed_chunk.choices[0]
17251735
if isinstance(choice, StreamingChoices):
17261736
self.response_uptil_now += choice.delta.get("content", "") or ""

litellm/llms/anthropic/chat/transformation.py

+26-4
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
AnthropicMessagesTool,
1919
AnthropicMessagesToolChoice,
2020
AnthropicSystemMessageContent,
21+
AnthropicThinkingParam,
2122
)
2223
from litellm.types.llms.openai import (
24+
REASONING_EFFORT,
2325
AllMessageValues,
2426
ChatCompletionCachedContent,
2527
ChatCompletionSystemMessage,
@@ -94,6 +96,7 @@ def get_supported_openai_params(self, model: str):
9496
"parallel_tool_calls",
9597
"response_format",
9698
"user",
99+
"reasoning_effort",
97100
]
98101

99102
if "claude-3-7-sonnet" in model:
@@ -291,6 +294,21 @@ def _map_stop_sequences(
291294
new_stop = new_v
292295
return new_stop
293296

297+
@staticmethod
298+
def _map_reasoning_effort(
299+
reasoning_effort: Optional[Union[REASONING_EFFORT, str]]
300+
) -> Optional[AnthropicThinkingParam]:
301+
if reasoning_effort is None:
302+
return None
303+
elif reasoning_effort == "low":
304+
return AnthropicThinkingParam(type="enabled", budget_tokens=1024)
305+
elif reasoning_effort == "medium":
306+
return AnthropicThinkingParam(type="enabled", budget_tokens=2048)
307+
elif reasoning_effort == "high":
308+
return AnthropicThinkingParam(type="enabled", budget_tokens=4096)
309+
else:
310+
raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}")
311+
294312
def map_openai_params(
295313
self,
296314
non_default_params: dict,
@@ -302,10 +320,6 @@ def map_openai_params(
302320
non_default_params=non_default_params
303321
)
304322

305-
## handle thinking tokens
306-
self.update_optional_params_with_thinking_tokens(
307-
non_default_params=non_default_params, optional_params=optional_params
308-
)
309323
for param, value in non_default_params.items():
310324
if param == "max_tokens":
311325
optional_params["max_tokens"] = value
@@ -370,7 +384,15 @@ def map_openai_params(
370384
optional_params["metadata"] = {"user_id": value}
371385
if param == "thinking":
372386
optional_params["thinking"] = value
387+
elif param == "reasoning_effort" and isinstance(value, str):
388+
optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
389+
value
390+
)
373391

392+
## handle thinking tokens
393+
self.update_optional_params_with_thinking_tokens(
394+
non_default_params=non_default_params, optional_params=optional_params
395+
)
374396
return optional_params
375397

376398
def _create_json_tool_call_for_response_format(

litellm/llms/base_llm/chat/transformation.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,10 @@ def get_json_schema_from_pydantic_object(
104104
return type_to_response_format_param(response_format=response_format)
105105

106106
def is_thinking_enabled(self, non_default_params: dict) -> bool:
107-
return non_default_params.get("thinking", {}).get("type", None) == "enabled"
107+
return (
108+
non_default_params.get("thinking", {}).get("type") == "enabled"
109+
or non_default_params.get("reasoning_effort") is not None
110+
)
108111

109112
def update_optional_params_with_thinking_tokens(
110113
self, non_default_params: dict, optional_params: dict
@@ -116,9 +119,9 @@ def update_optional_params_with_thinking_tokens(
116119
117120
if 'thinking' is enabled and 'max_tokens' is not specified, set 'max_tokens' to the thinking token budget + DEFAULT_MAX_TOKENS
118121
"""
119-
is_thinking_enabled = self.is_thinking_enabled(non_default_params)
122+
is_thinking_enabled = self.is_thinking_enabled(optional_params)
120123
if is_thinking_enabled and "max_tokens" not in non_default_params:
121-
thinking_token_budget = cast(dict, non_default_params["thinking"]).get(
124+
thinking_token_budget = cast(dict, optional_params["thinking"]).get(
122125
"budget_tokens", None
123126
)
124127
if thinking_token_budget is not None:

litellm/llms/bedrock/chat/converse_transformation.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
_bedrock_converse_messages_pt,
1818
_bedrock_tools_pt,
1919
)
20+
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
2021
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
2122
from litellm.types.llms.bedrock import *
2223
from litellm.types.llms.openai import (
@@ -128,6 +129,7 @@ def get_supported_openai_params(self, model: str) -> List[str]:
128129
"claude-3-7" in model
129130
): # [TODO]: move to a 'supports_reasoning_content' param from model cost map
130131
supported_params.append("thinking")
132+
supported_params.append("reasoning_effort")
131133
return supported_params
132134

133135
def map_tool_choice_values(
@@ -218,9 +220,7 @@ def map_openai_params(
218220
messages: Optional[List[AllMessageValues]] = None,
219221
) -> dict:
220222
is_thinking_enabled = self.is_thinking_enabled(non_default_params)
221-
self.update_optional_params_with_thinking_tokens(
222-
non_default_params=non_default_params, optional_params=optional_params
223-
)
223+
224224
for param, value in non_default_params.items():
225225
if param == "response_format" and isinstance(value, dict):
226226
ignore_response_format_types = ["text"]
@@ -297,6 +297,14 @@ def map_openai_params(
297297
optional_params["tool_choice"] = _tool_choice_value
298298
if param == "thinking":
299299
optional_params["thinking"] = value
300+
elif param == "reasoning_effort" and isinstance(value, str):
301+
optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
302+
value
303+
)
304+
305+
self.update_optional_params_with_thinking_tokens(
306+
non_default_params=non_default_params, optional_params=optional_params
307+
)
300308

301309
return optional_params
302310

litellm/types/llms/openai.py

+3
Original file line numberDiff line numberDiff line change
@@ -1113,3 +1113,6 @@ class ErrorEvent(BaseLiteLLMOpenAIResponseObject):
11131113
],
11141114
Discriminator("type"),
11151115
]
1116+
1117+
1118+
REASONING_EFFORT = Literal["low", "medium", "high"]

litellm/utils.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -5901,9 +5901,10 @@ async def __anext__(self):
59015901

59025902

59035903
class ModelResponseListIterator:
5904-
def __init__(self, model_responses):
5904+
def __init__(self, model_responses, delay: Optional[float] = None):
59055905
self.model_responses = model_responses
59065906
self.index = 0
5907+
self.delay = delay
59075908

59085909
# Sync iterator
59095910
def __iter__(self):
@@ -5914,6 +5915,8 @@ def __next__(self):
59145915
raise StopIteration
59155916
model_response = self.model_responses[self.index]
59165917
self.index += 1
5918+
if self.delay:
5919+
time.sleep(self.delay)
59175920
return model_response
59185921

59195922
# Async iterator
@@ -5925,6 +5928,8 @@ async def __anext__(self):
59255928
raise StopAsyncIteration
59265929
model_response = self.model_responses[self.index]
59275930
self.index += 1
5931+
if self.delay:
5932+
await asyncio.sleep(self.delay)
59285933
return model_response
59295934

59305935

tests/litellm/litellm_core_utils/test_streaming_handler.py

+51
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import os
33
import sys
4+
import time
45
from unittest.mock import MagicMock, Mock, patch
56

67
import pytest
@@ -19,6 +20,7 @@
1920
Delta,
2021
ModelResponseStream,
2122
PromptTokensDetailsWrapper,
23+
StandardLoggingPayload,
2224
StreamingChoices,
2325
Usage,
2426
)
@@ -36,6 +38,22 @@ def initialized_custom_stream_wrapper() -> CustomStreamWrapper:
3638
return streaming_handler
3739

3840

41+
@pytest.fixture
42+
def logging_obj() -> Logging:
43+
import time
44+
45+
logging_obj = Logging(
46+
model="my-random-model",
47+
messages=[{"role": "user", "content": "Hey"}],
48+
stream=True,
49+
call_type="completion",
50+
start_time=time.time(),
51+
litellm_call_id="12345",
52+
function_id="1245",
53+
)
54+
return logging_obj
55+
56+
3957
bedrock_chunks = [
4058
ModelResponseStream(
4159
id="chatcmpl-d249def8-a78b-464c-87b5-3a6f43565292",
@@ -577,3 +595,36 @@ def test_streaming_handler_with_stop_chunk(
577595
**args, model_response=ModelResponseStream()
578596
)
579597
assert returned_chunk is None
598+
599+
600+
@pytest.mark.asyncio
601+
async def test_streaming_completion_start_time(logging_obj: Logging):
602+
"""Test that the start time is set correctly"""
603+
from litellm.integrations.custom_logger import CustomLogger
604+
605+
class MockCallback(CustomLogger):
606+
pass
607+
608+
mock_callback = MockCallback()
609+
litellm.success_callback = [mock_callback, "langfuse"]
610+
611+
completion_stream = ModelResponseListIterator(
612+
model_responses=bedrock_chunks, delay=0.1
613+
)
614+
615+
response = CustomStreamWrapper(
616+
completion_stream=completion_stream,
617+
model="bedrock/claude-3-5-sonnet-20240620-v1:0",
618+
logging_obj=logging_obj,
619+
)
620+
621+
async for chunk in response:
622+
print(chunk)
623+
624+
await asyncio.sleep(2)
625+
626+
assert logging_obj.model_call_details["completion_start_time"] is not None
627+
assert (
628+
logging_obj.model_call_details["completion_start_time"]
629+
< logging_obj.model_call_details["end_time"]
630+
)

tests/llm_translation/test_optional_params.py

+17
Original file line numberDiff line numberDiff line change
@@ -1379,3 +1379,20 @@ def test_azure_modalities_param():
13791379
)
13801380
assert optional_params["modalities"] == ["text", "audio"]
13811381
assert optional_params["audio"] == {"type": "audio_input", "input": "test.wav"}
1382+
1383+
@pytest.mark.parametrize(
1384+
"model, provider",
1385+
[
1386+
("claude-3-7-sonnet-20240620-v1:0", "anthropic"),
1387+
("anthropic.claude-3-7-sonnet-20250219-v1:0", "bedrock"),
1388+
("invoke/anthropic.claude-3-7-sonnet-20240620-v1:0", "bedrock"),
1389+
("claude-3-7-sonnet@20250219", "vertex_ai"),
1390+
],
1391+
)
1392+
def test_anthropic_unified_reasoning_content(model, provider):
1393+
optional_params = get_optional_params(
1394+
model=model,
1395+
custom_llm_provider=provider,
1396+
reasoning_effort="high",
1397+
)
1398+
assert optional_params["thinking"] == {"type": "enabled", "budget_tokens": 4096}

0 commit comments

Comments
 (0)