BerriAI
diff --git a/‎docs/my-website/docs/embedding/supported_embedding.md
+1-1 b/‎docs/my-website/docs/embedding/supported_embedding.md
+1-1
diff --git a/‎docs/my-website/docs/image_generation.md
+1-1 b/‎docs/my-website/docs/image_generation.md
+1-1
diff --git a/‎docs/my-website/docs/providers/anthropic.md
+29-17 b/‎docs/my-website/docs/providers/anthropic.md
+29-17
diff --git a/‎docs/my-website/sidebars.js
+40-34 b/‎docs/my-website/sidebars.js
+40-34
diff --git a/‎litellm/litellm_core_utils/streaming_handler.py
+1-1 b/‎litellm/litellm_core_utils/streaming_handler.py
+1-1
diff --git a/‎litellm/llms/OpenAI/chat/o1_handler.py
+2-34 b/‎litellm/llms/OpenAI/chat/o1_handler.py
+2-34
diff --git a/‎litellm/llms/groq/chat/handler.py
+46-33 b/‎litellm/llms/groq/chat/handler.py
+46-33
@@ -1,7 +1,7 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-# Embedding Models
+# Embeddings
 
 ## Quick Start
 ```python
 
@@ -1,4 +1,4 @@
-# Image Generation
+# Images
 
 ## Quick Start
 
 
@@ -10,6 +10,35 @@ LiteLLM supports all anthropic models.
 - `claude-2.1`
 - `claude-instant-1.2`
 
+
+| Property | Details |
+|-------|-------|
+| Description | Claude is a highly performant, trustworthy, and intelligent AI platform built by Anthropic. Claude excels at tasks involving language, reasoning, analysis, coding, and more. |
+| Provider Route on LiteLLM | `anthropic/` (add this prefix to the model name, to route any requests to Anthropic - e.g. `anthropic/claude-3-5-sonnet-20240620`) |
+| Provider Doc | [Anthropic ↗](https://docs.anthropic.com/en/docs/build-with-claude/overview) |
+| API Endpoint for Provider | https://api.anthropic.com |
+| Supported Endpoints | `/chat/completions` |
+
+
+## Supported OpenAI Parameters
+
+Check this in code, [here](../completion/input.md#translated-openai-params)
+
+```
+"stream",
+"stop",
+"temperature",
+"top_p",
+"max_tokens",
+"max_completion_tokens",
+"tools",
+"tool_choice",
+"extra_headers",
+"parallel_tool_calls",
+"response_format",
+"user"
+```
+
 :::info
 
 Anthropic API fails requests when `max_tokens` are not passed. Due to this litellm passes `max_tokens=4096` when no `max_tokens` are passed.
@@ -1006,20 +1035,3 @@ curl http://0.0.0.0:4000/v1/chat/completions \
 
 </TabItem>
 </Tabs>
-
-## All Supported OpenAI Params
-
-```
-"stream",
-"stop",
-"temperature",
-"top_p",
-"max_tokens",
-"max_completion_tokens",
-"tools",
-"tool_choice",
-"extra_headers",
-"parallel_tool_calls",
-"response_format",
-"user"
-```
 
@@ -199,46 +199,52 @@ const sidebars = {
 
       ],
     },
-    {
-      type: "category",
-      label: "Guides",
-      link: {
-        type: "generated-index",
-        title: "Chat Completions",
-        description: "Details on the completion() function",
-        slug: "/completion",
-      },
-      items: [
-        "completion/input",
-        "completion/provider_specific_params",
-        "completion/json_mode",
-        "completion/prompt_caching",
-        "completion/audio",
-        "completion/vision",
-        "completion/predict_outputs",
-        "completion/prefix",
-        "completion/drop_params",
-        "completion/prompt_formatting",
-        "completion/output",
-        "completion/usage",
-        "exception_mapping",
-        "completion/stream",
-        "completion/message_trimming",
-        "completion/function_call",
-        "completion/model_alias",
-        "completion/batching",
-        "completion/mock_requests",
-        "completion/reliable_completions",
-      ],
-    },
     {
       type: "category",
       label: "Supported Endpoints",
       items: [
+        {
+          type: "category",
+          label: "Chat",
+          link: {
+            type: "generated-index",
+            title: "Chat Completions",
+            description: "Details on the completion() function",
+            slug: "/completion",
+          },
+          items: [
+            "completion/input",
+            "completion/provider_specific_params",
+            "completion/json_mode",
+            "completion/prompt_caching",
+            "completion/audio",
+            "completion/vision",
+            "completion/predict_outputs",
+            "completion/prefix",
+            "completion/drop_params",
+            "completion/prompt_formatting",
+            "completion/output",
+            "completion/usage",
+            "exception_mapping",
+            "completion/stream",
+            "completion/message_trimming",
+            "completion/function_call",
+            "completion/model_alias",
+            "completion/batching",
+            "completion/mock_requests",
+            "completion/reliable_completions",
+          ],
+        },
         "embedding/supported_embedding",
         "image_generation",
-        "audio_transcription",
-        "text_to_speech",
+        {
+          type: "category",
+          label: "Audio",
+          "items": [
+            "audio_transcription",
+            "text_to_speech",
+          ]
+        },
         "rerank",
         "assistants",
         "batches",
 
@@ -1793,7 +1793,7 @@ async def __anext__(self):  # noqa: PLR0915
                 or self.custom_llm_provider == "bedrock"
                 or self.custom_llm_provider == "triton"
                 or self.custom_llm_provider == "watsonx"
-                or self.custom_llm_provider in litellm.openai_compatible_endpoints
+                or self.custom_llm_provider in litellm.openai_compatible_providers
                 or self.custom_llm_provider in litellm._custom_providers
             ):
                 async for chunk in self.completion_stream:
 
@@ -17,22 +17,6 @@
 
 class OpenAIO1ChatCompletion(OpenAIChatCompletion):
 
-    async def mock_async_streaming(
-        self,
-        response: Any,
-        model: Optional[str],
-        logging_obj: Any,
-    ):
-        model_response = await response
-        completion_stream = MockResponseIterator(model_response=model_response)
-        streaming_response = CustomStreamWrapper(
-            completion_stream=completion_stream,
-            model=model,
-            custom_llm_provider="openai",
-            logging_obj=logging_obj,
-        )
-        return streaming_response
-
     def completion(
         self,
         model_response: ModelResponse,
@@ -54,7 +38,7 @@ def completion(
         custom_llm_provider: Optional[str] = None,
         drop_params: Optional[bool] = None,
     ):
-        stream: Optional[bool] = optional_params.pop("stream", False)
+        # stream: Optional[bool] = optional_params.pop("stream", False)
         response = super().completion(
             model_response,
             timeout,
@@ -76,20 +60,4 @@ def completion(
             drop_params,
         )
 
-        if stream is True:
-            if asyncio.iscoroutine(response):
-                return self.mock_async_streaming(
-                    response=response, model=model, logging_obj=logging_obj  # type: ignore
-                )
-
-            completion_stream = MockResponseIterator(model_response=response)
-            streaming_response = CustomStreamWrapper(
-                completion_stream=completion_stream,
-                model=model,
-                custom_llm_provider="openai",
-                logging_obj=logging_obj,
-            )
-
-            return streaming_response
-        else:
-            return response
+        return response
@@ -6,55 +6,68 @@
 
 from httpx._config import Timeout
 
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.types.utils import CustomStreamingDecoder
 from litellm.utils import ModelResponse
 
 from ...groq.chat.transformation import GroqChatConfig
-from ...OpenAI.openai import OpenAIChatCompletion
+from ...openai_like.chat.handler import OpenAILikeChatHandler
 
 
-class GroqChatCompletion(OpenAIChatCompletion):
+class GroqChatCompletion(OpenAILikeChatHandler):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
     def completion(
         self,
+        *,
+        model: str,
+        messages: list,
+        api_base: str,
+        custom_llm_provider: str,
+        custom_prompt_dict: dict,
         model_response: ModelResponse,
-        timeout: Union[float, Timeout],
+        print_verbose: Callable,
+        encoding,
+        api_key: Optional[str],
+        logging_obj,
         optional_params: dict,
-        logging_obj: Any,
-        model: Optional[str] = None,
-        messages: Optional[list] = None,
-        print_verbose: Optional[Callable[..., Any]] = None,
-        api_key: Optional[str] = None,
-        api_base: Optional[str] = None,
-        acompletion: bool = False,
+        acompletion=None,
         litellm_params=None,
         logger_fn=None,
         headers: Optional[dict] = None,
-        custom_prompt_dict: dict = {},
-        client=None,
-        organization: Optional[str] = None,
-        custom_llm_provider: Optional[str] = None,
-        drop_params: Optional[bool] = None,
+        timeout: Optional[Union[float, Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+        custom_endpoint: Optional[bool] = None,
+        streaming_decoder: Optional[CustomStreamingDecoder] = None,
+        fake_stream: bool = False
     ):
         messages = GroqChatConfig()._transform_messages(messages)  # type: ignore
+
+        if optional_params.get("stream") is True:
+            fake_stream = GroqChatConfig()._should_fake_stream(optional_params)
+        else:
+            fake_stream = False
+
         return super().completion(
-            model_response,
-            timeout,
-            optional_params,
-            logging_obj,
-            model,
-            messages,
-            print_verbose,
-            api_key,
-            api_base,
-            acompletion,
-            litellm_params,
-            logger_fn,
-            headers,
-            custom_prompt_dict,
-            client,
-            organization,
-            custom_llm_provider,
-            drop_params,
+            model=model,
+            messages=messages,
+            api_base=api_base,
+            custom_llm_provider=custom_llm_provider,
+            custom_prompt_dict=custom_prompt_dict,
+            model_response=model_response,
+            print_verbose=print_verbose,
+            encoding=encoding,
+            api_key=api_key,
+            logging_obj=logging_obj,
+            optional_params=optional_params,
+            acompletion=acompletion,
+            litellm_params=litellm_params,
+            logger_fn=logger_fn,
+            headers=headers,
+            timeout=timeout,
+            client=client,
+            custom_endpoint=custom_endpoint,
+            streaming_decoder=streaming_decoder,
+            fake_stream=fake_stream,
         )
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Image Generation`
	`1`	`+# Images`
`2`	`2`
`3`	`3`	`## Quick Start`
`4`	`4`