Skip to content

Commit 7e5085d

Browse files
Litellm dev 11 21 2024 (#6837)
* Fix Vertex AI function calling invoke: use JSON format instead of protobuf text format. (#6702) * test: test tool_call conversion when arguments is empty dict Fixes #6833 * fix(openai_like/handler.py): return more descriptive error message Fixes #6812 * test: skip overloaded model * docs(anthropic.md): update anthropic docs to show how to route to any new model * feat(groq/): fake stream when 'response_format' param is passed Groq doesn't support streaming when response_format is set * feat(groq/): add response_format support for groq Closes #6845 * fix(o1_handler.py): remove fake streaming for o1 Closes #6801 * build(model_prices_and_context_window.json): add groq llama3.2b model pricing Closes #6807 * fix(utils.py): fix handling ollama response format param Fixes #6848 (comment) * docs(sidebars.js): refactor chat endpoint placement * fix: fix linting errors * test: fix test * test: fix test * fix(openai_like/handler): handle max retries * fix(streaming_handler.py): fix streaming check for openai-compatible providers * test: update test * test: correctly handle model is overloaded error * test: update test * test: fix test * test: mark flaky test --------- Co-authored-by: Guowang Li <[email protected]>
1 parent a7d5536 commit 7e5085d

31 files changed

+746
-402
lines changed

docs/my-website/docs/embedding/supported_embedding.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import Tabs from '@theme/Tabs';
22
import TabItem from '@theme/TabItem';
33

4-
# Embedding Models
4+
# Embeddings
55

66
## Quick Start
77
```python

docs/my-website/docs/image_generation.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Image Generation
1+
# Images
22

33
## Quick Start
44

docs/my-website/docs/providers/anthropic.md

+29-17
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,35 @@ LiteLLM supports all anthropic models.
1010
- `claude-2.1`
1111
- `claude-instant-1.2`
1212

13+
14+
| Property | Details |
15+
|-------|-------|
16+
| Description | Claude is a highly performant, trustworthy, and intelligent AI platform built by Anthropic. Claude excels at tasks involving language, reasoning, analysis, coding, and more. |
17+
| Provider Route on LiteLLM | `anthropic/` (add this prefix to the model name, to route any requests to Anthropic - e.g. `anthropic/claude-3-5-sonnet-20240620`) |
18+
| Provider Doc | [Anthropic ↗](https://docs.anthropic.com/en/docs/build-with-claude/overview) |
19+
| API Endpoint for Provider | https://api.anthropic.com |
20+
| Supported Endpoints | `/chat/completions` |
21+
22+
23+
## Supported OpenAI Parameters
24+
25+
Check this in code, [here](../completion/input.md#translated-openai-params)
26+
27+
```
28+
"stream",
29+
"stop",
30+
"temperature",
31+
"top_p",
32+
"max_tokens",
33+
"max_completion_tokens",
34+
"tools",
35+
"tool_choice",
36+
"extra_headers",
37+
"parallel_tool_calls",
38+
"response_format",
39+
"user"
40+
```
41+
1342
:::info
1443

1544
Anthropic API fails requests when `max_tokens` are not passed. Due to this litellm passes `max_tokens=4096` when no `max_tokens` are passed.
@@ -1006,20 +1035,3 @@ curl http://0.0.0.0:4000/v1/chat/completions \
10061035

10071036
</TabItem>
10081037
</Tabs>
1009-
1010-
## All Supported OpenAI Params
1011-
1012-
```
1013-
"stream",
1014-
"stop",
1015-
"temperature",
1016-
"top_p",
1017-
"max_tokens",
1018-
"max_completion_tokens",
1019-
"tools",
1020-
"tool_choice",
1021-
"extra_headers",
1022-
"parallel_tool_calls",
1023-
"response_format",
1024-
"user"
1025-
```

docs/my-website/sidebars.js

+40-34
Original file line numberDiff line numberDiff line change
@@ -199,46 +199,52 @@ const sidebars = {
199199

200200
],
201201
},
202-
{
203-
type: "category",
204-
label: "Guides",
205-
link: {
206-
type: "generated-index",
207-
title: "Chat Completions",
208-
description: "Details on the completion() function",
209-
slug: "/completion",
210-
},
211-
items: [
212-
"completion/input",
213-
"completion/provider_specific_params",
214-
"completion/json_mode",
215-
"completion/prompt_caching",
216-
"completion/audio",
217-
"completion/vision",
218-
"completion/predict_outputs",
219-
"completion/prefix",
220-
"completion/drop_params",
221-
"completion/prompt_formatting",
222-
"completion/output",
223-
"completion/usage",
224-
"exception_mapping",
225-
"completion/stream",
226-
"completion/message_trimming",
227-
"completion/function_call",
228-
"completion/model_alias",
229-
"completion/batching",
230-
"completion/mock_requests",
231-
"completion/reliable_completions",
232-
],
233-
},
234202
{
235203
type: "category",
236204
label: "Supported Endpoints",
237205
items: [
206+
{
207+
type: "category",
208+
label: "Chat",
209+
link: {
210+
type: "generated-index",
211+
title: "Chat Completions",
212+
description: "Details on the completion() function",
213+
slug: "/completion",
214+
},
215+
items: [
216+
"completion/input",
217+
"completion/provider_specific_params",
218+
"completion/json_mode",
219+
"completion/prompt_caching",
220+
"completion/audio",
221+
"completion/vision",
222+
"completion/predict_outputs",
223+
"completion/prefix",
224+
"completion/drop_params",
225+
"completion/prompt_formatting",
226+
"completion/output",
227+
"completion/usage",
228+
"exception_mapping",
229+
"completion/stream",
230+
"completion/message_trimming",
231+
"completion/function_call",
232+
"completion/model_alias",
233+
"completion/batching",
234+
"completion/mock_requests",
235+
"completion/reliable_completions",
236+
],
237+
},
238238
"embedding/supported_embedding",
239239
"image_generation",
240-
"audio_transcription",
241-
"text_to_speech",
240+
{
241+
type: "category",
242+
label: "Audio",
243+
"items": [
244+
"audio_transcription",
245+
"text_to_speech",
246+
]
247+
},
242248
"rerank",
243249
"assistants",
244250
"batches",

litellm/litellm_core_utils/streaming_handler.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1793,7 +1793,7 @@ async def __anext__(self): # noqa: PLR0915
17931793
or self.custom_llm_provider == "bedrock"
17941794
or self.custom_llm_provider == "triton"
17951795
or self.custom_llm_provider == "watsonx"
1796-
or self.custom_llm_provider in litellm.openai_compatible_endpoints
1796+
or self.custom_llm_provider in litellm.openai_compatible_providers
17971797
or self.custom_llm_provider in litellm._custom_providers
17981798
):
17991799
async for chunk in self.completion_stream:

litellm/llms/OpenAI/chat/o1_handler.py

+2-34
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,6 @@
1717

1818
class OpenAIO1ChatCompletion(OpenAIChatCompletion):
1919

20-
async def mock_async_streaming(
21-
self,
22-
response: Any,
23-
model: Optional[str],
24-
logging_obj: Any,
25-
):
26-
model_response = await response
27-
completion_stream = MockResponseIterator(model_response=model_response)
28-
streaming_response = CustomStreamWrapper(
29-
completion_stream=completion_stream,
30-
model=model,
31-
custom_llm_provider="openai",
32-
logging_obj=logging_obj,
33-
)
34-
return streaming_response
35-
3620
def completion(
3721
self,
3822
model_response: ModelResponse,
@@ -54,7 +38,7 @@ def completion(
5438
custom_llm_provider: Optional[str] = None,
5539
drop_params: Optional[bool] = None,
5640
):
57-
stream: Optional[bool] = optional_params.pop("stream", False)
41+
# stream: Optional[bool] = optional_params.pop("stream", False)
5842
response = super().completion(
5943
model_response,
6044
timeout,
@@ -76,20 +60,4 @@ def completion(
7660
drop_params,
7761
)
7862

79-
if stream is True:
80-
if asyncio.iscoroutine(response):
81-
return self.mock_async_streaming(
82-
response=response, model=model, logging_obj=logging_obj # type: ignore
83-
)
84-
85-
completion_stream = MockResponseIterator(model_response=response)
86-
streaming_response = CustomStreamWrapper(
87-
completion_stream=completion_stream,
88-
model=model,
89-
custom_llm_provider="openai",
90-
logging_obj=logging_obj,
91-
)
92-
93-
return streaming_response
94-
else:
95-
return response
63+
return response

litellm/llms/groq/chat/handler.py

+46-33
Original file line numberDiff line numberDiff line change
@@ -6,55 +6,68 @@
66

77
from httpx._config import Timeout
88

9+
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
10+
from litellm.types.utils import CustomStreamingDecoder
911
from litellm.utils import ModelResponse
1012

1113
from ...groq.chat.transformation import GroqChatConfig
12-
from ...OpenAI.openai import OpenAIChatCompletion
14+
from ...openai_like.chat.handler import OpenAILikeChatHandler
1315

1416

15-
class GroqChatCompletion(OpenAIChatCompletion):
17+
class GroqChatCompletion(OpenAILikeChatHandler):
1618
def __init__(self, **kwargs):
1719
super().__init__(**kwargs)
1820

1921
def completion(
2022
self,
23+
*,
24+
model: str,
25+
messages: list,
26+
api_base: str,
27+
custom_llm_provider: str,
28+
custom_prompt_dict: dict,
2129
model_response: ModelResponse,
22-
timeout: Union[float, Timeout],
30+
print_verbose: Callable,
31+
encoding,
32+
api_key: Optional[str],
33+
logging_obj,
2334
optional_params: dict,
24-
logging_obj: Any,
25-
model: Optional[str] = None,
26-
messages: Optional[list] = None,
27-
print_verbose: Optional[Callable[..., Any]] = None,
28-
api_key: Optional[str] = None,
29-
api_base: Optional[str] = None,
30-
acompletion: bool = False,
35+
acompletion=None,
3136
litellm_params=None,
3237
logger_fn=None,
3338
headers: Optional[dict] = None,
34-
custom_prompt_dict: dict = {},
35-
client=None,
36-
organization: Optional[str] = None,
37-
custom_llm_provider: Optional[str] = None,
38-
drop_params: Optional[bool] = None,
39+
timeout: Optional[Union[float, Timeout]] = None,
40+
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
41+
custom_endpoint: Optional[bool] = None,
42+
streaming_decoder: Optional[CustomStreamingDecoder] = None,
43+
fake_stream: bool = False
3944
):
4045
messages = GroqChatConfig()._transform_messages(messages) # type: ignore
46+
47+
if optional_params.get("stream") is True:
48+
fake_stream = GroqChatConfig()._should_fake_stream(optional_params)
49+
else:
50+
fake_stream = False
51+
4152
return super().completion(
42-
model_response,
43-
timeout,
44-
optional_params,
45-
logging_obj,
46-
model,
47-
messages,
48-
print_verbose,
49-
api_key,
50-
api_base,
51-
acompletion,
52-
litellm_params,
53-
logger_fn,
54-
headers,
55-
custom_prompt_dict,
56-
client,
57-
organization,
58-
custom_llm_provider,
59-
drop_params,
53+
model=model,
54+
messages=messages,
55+
api_base=api_base,
56+
custom_llm_provider=custom_llm_provider,
57+
custom_prompt_dict=custom_prompt_dict,
58+
model_response=model_response,
59+
print_verbose=print_verbose,
60+
encoding=encoding,
61+
api_key=api_key,
62+
logging_obj=logging_obj,
63+
optional_params=optional_params,
64+
acompletion=acompletion,
65+
litellm_params=litellm_params,
66+
logger_fn=logger_fn,
67+
headers=headers,
68+
timeout=timeout,
69+
client=client,
70+
custom_endpoint=custom_endpoint,
71+
streaming_decoder=streaming_decoder,
72+
fake_stream=fake_stream,
6073
)

0 commit comments

Comments
 (0)