diff --git a/docs/my-website/docs/integrations/index.md b/docs/my-website/docs/integrations/index.md index 95c922cce89..eff0daa7f5f 100644 --- a/docs/my-website/docs/integrations/index.md +++ b/docs/my-website/docs/integrations/index.md @@ -3,6 +3,7 @@ This section covers integrations with various tools and services that can be used with LiteLLM (either Proxy or SDK). ## AI Agent Frameworks +- **[AgentField](../tutorials/agentfield.md)** - Open-source control plane for building and orchestrating autonomous AI agents - **[Letta](./letta.md)** - Build stateful LLM agents with persistent memory using LiteLLM Proxy ## Development Tools @@ -15,4 +16,4 @@ This section covers integrations with various tools and services that can be use - **[Datadog](../observability/datadog.md)** -Click into each section to learn more about the integrations. \ No newline at end of file +Click into each section to learn more about the integrations. diff --git a/docs/my-website/docs/tutorials/agentfield.md b/docs/my-website/docs/tutorials/agentfield.md new file mode 100644 index 00000000000..739e4f95238 --- /dev/null +++ b/docs/my-website/docs/tutorials/agentfield.md @@ -0,0 +1,124 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# AgentField with LiteLLM + +Use [AgentField](https://agentfield.ai) with any LLM provider through LiteLLM. + +AgentField is an open-source control plane for building and orchestrating autonomous AI agents, with SDKs for Python, TypeScript, and Go. AgentField's Python SDK uses LiteLLM internally for multi-provider LLM support. + +## Overview + +AgentField's Python SDK uses `litellm.acompletion()` under the hood, giving you access to 100+ LLM providers out of the box: + +- Use any LiteLLM-supported model (OpenAI, Anthropic, Azure, Bedrock, Ollama, etc.) +- Switch between providers by changing the model string +- All LiteLLM features (caching, fallbacks, routing) work automatically + +## Prerequisites + +- Python 3.9+ +- API keys for your LLM providers +- AgentField control plane (optional, for orchestration features) + +## Installation + +```bash +pip install agentfield +``` + +## Quick Start + +### Basic Agent with OpenAI + +```python +from agentfield import Agent, AgentConfig + +config = AgentConfig( + name="my-agent", + model="gpt-4o", # Any LiteLLM-supported model + instructions="You are a helpful assistant." +) + +agent = Agent(config) +response = await agent.run("Hello, world!") +``` + +### Using Anthropic + +```python +config = AgentConfig( + name="claude-agent", + model="anthropic/claude-sonnet-4-20250514", # LiteLLM model format + instructions="You are a helpful assistant." +) +``` + +### Using Ollama (Local Models) + +```python +config = AgentConfig( + name="local-agent", + model="ollama/llama3.1", # LiteLLM's ollama/ prefix + instructions="You are a helpful assistant." +) +``` + +### Using Azure OpenAI + +```python +config = AgentConfig( + name="azure-agent", + model="azure/gpt-4o", # LiteLLM's azure/ prefix + instructions="You are a helpful assistant." +) +``` + +### Using with LiteLLM Proxy + +Point AgentField to a LiteLLM Proxy for centralized model management: + +```python +import os + +os.environ["OPENAI_API_BASE"] = "http://0.0.0.0:4000" # LiteLLM Proxy URL +os.environ["OPENAI_API_KEY"] = "sk-1234" # LiteLLM Proxy key + +config = AgentConfig( + name="proxy-agent", + model="gpt-4o", # Virtual model name from proxy config + instructions="You are a helpful assistant." +) +``` + +## Multi-Agent Orchestration + +AgentField's control plane orchestrates multiple agents, each potentially using different LLM providers: + +```python +from agentfield import Agent, AgentConfig, ControlPlane + +# Create agents with different providers +researcher = Agent(AgentConfig( + name="researcher", + model="anthropic/claude-sonnet-4-20250514", + instructions="You research topics thoroughly." +)) + +writer = Agent(AgentConfig( + name="writer", + model="gpt-4o", + instructions="You write clear, concise content." +)) + +# Register with control plane +cp = ControlPlane(server="http://localhost:8080") +cp.register(researcher) +cp.register(writer) +``` + +## Links + +- [Documentation](https://agentfield.ai/docs) +- [GitHub](https://github.com/Agent-Field/agentfield) +- [Python SDK](https://github.com/Agent-Field/agentfield/tree/main/sdk/python) diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index b4a1337d54e..505345c90ea 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -185,6 +185,7 @@ const sidebars = { slug: "/agent_sdks" }, items: [ + "tutorials/agentfield", "tutorials/openai_agents_sdk", "tutorials/claude_agent_sdk", "tutorials/copilotkit_sdk", diff --git a/litellm/__init__.py b/litellm/__init__.py index 4fc71e12700..91122681478 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -1452,6 +1452,7 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None: from .llms.perplexity.responses.transformation import PerplexityResponsesConfig as PerplexityResponsesConfig from .llms.databricks.responses.transformation import DatabricksResponsesAPIConfig as DatabricksResponsesAPIConfig from .llms.openrouter.responses.transformation import OpenRouterResponsesAPIConfig as OpenRouterResponsesAPIConfig + from .llms.ovhcloud.responses.transformation import OVHCloudResponsesAPIConfig as OVHCloudResponsesAPIConfig from .llms.gemini.interactions.transformation import GoogleAIStudioInteractionsConfig as GoogleAIStudioInteractionsConfig from .llms.openai.chat.o_series_transformation import OpenAIOSeriesConfig as OpenAIOSeriesConfig, OpenAIOSeriesConfig as OpenAIO1Config from .llms.anthropic.skills.transformation import AnthropicSkillsConfig as AnthropicSkillsConfig diff --git a/litellm/_lazy_imports_registry.py b/litellm/_lazy_imports_registry.py index 9e0453102d0..d24f4bd3ac3 100644 --- a/litellm/_lazy_imports_registry.py +++ b/litellm/_lazy_imports_registry.py @@ -233,6 +233,7 @@ "PerplexityResponsesConfig", "DatabricksResponsesAPIConfig", "OpenRouterResponsesAPIConfig", + "OVHCloudResponsesAPIConfig", "GoogleAIStudioInteractionsConfig", "OpenAIOSeriesConfig", "AnthropicSkillsConfig", @@ -930,6 +931,10 @@ ".llms.openrouter.responses.transformation", "OpenRouterResponsesAPIConfig", ), + "OVHCloudResponsesAPIConfig": ( + ".llms.ovhcloud.responses.transformation", + "OVHCloudResponsesAPIConfig", + ), "GoogleAIStudioInteractionsConfig": ( ".llms.gemini.interactions.transformation", "GoogleAIStudioInteractionsConfig", diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 75d45af86e6..b886dd9f841 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -1095,6 +1095,19 @@ def completion_cost( # noqa: PLR0915 router_model_id=router_model_id, ) + # When base_model overrides model and carries its own provider prefix + # (e.g. base_model="gemini/gemini-2.0-flash" on an anthropic deployment), + # align custom_llm_provider so cost_per_token builds the correct key. + # Skip when custom_pricing is True (base_model is ignored in that path). + _provider_overridden = False + if base_model is not None and selected_model is not None and not custom_pricing: + _parts = selected_model.split("/", 1) + if len(_parts) > 1 and _parts[0] in LlmProvidersSet: + extracted = _parts[0] + if extracted != custom_llm_provider: + custom_llm_provider = extracted + _provider_overridden = True + potential_model_names = [ selected_model, _get_response_model(completion_response), @@ -1176,9 +1189,10 @@ def completion_cost( # noqa: PLR0915 hidden_params = getattr(completion_response, "_hidden_params", None) if hidden_params is not None: - custom_llm_provider = hidden_params.get( - "custom_llm_provider", custom_llm_provider or None - ) + if not _provider_overridden: + custom_llm_provider = hidden_params.get( + "custom_llm_provider", custom_llm_provider or None + ) region_name = hidden_params.get("region_name", region_name) # For Gemini/Vertex AI responses, trafficType is stored in diff --git a/litellm/litellm_core_utils/audio_utils/utils.py b/litellm/litellm_core_utils/audio_utils/utils.py index a7d12841e58..8b59f1948b2 100644 --- a/litellm/litellm_core_utils/audio_utils/utils.py +++ b/litellm/litellm_core_utils/audio_utils/utils.py @@ -263,7 +263,16 @@ def calculate_request_duration(file: FileTypes) -> Optional[float]: # Extract duration using soundfile file_object = io.BytesIO(file_content) with sf.SoundFile(file_object) as audio: - duration = len(audio) / audio.samplerate + frames = len(audio) + # Guard against sentinel/invalid frame counts (e.g., 2^63-1 from libsndfile) + if frames <= 0 or frames >= 2**63 - 1: + return None + if audio.samplerate <= 0: + return None + duration = frames / audio.samplerate + # Reject implausible durations (> 24 hours) + if duration > 86400: + return None return duration except Exception: diff --git a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py index 4bc9f0c835a..4188a91eac8 100644 --- a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py +++ b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py @@ -46,6 +46,20 @@ from .get_headers import get_response_headers +def _normalize_images( + images: Optional[List[Dict[str, object]]], +) -> Optional[List[Dict[str, object]]]: + """Normalize image items to include required 'index' field if missing.""" + if images is None: + return None + normalized: List[Dict[str, object]] = [] + for i, img in enumerate(images): + if isinstance(img, dict) and "index" not in img: + img = {**img, "index": i} + normalized.append(img) + return normalized + + _MESSAGE_FIELDS: frozenset = frozenset(Message.model_fields.keys()) _CHOICES_FIELDS: frozenset = frozenset(Choices.model_fields.keys()) _MODEL_RESPONSE_FIELDS: frozenset = frozenset(ModelResponse.model_fields.keys()) | { diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py index 5b215c1fe54..aa73ab18f90 100644 --- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py +++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py @@ -24,6 +24,10 @@ from litellm.types.router import GenericLiteLLMParams from litellm.utils import ProviderConfigManager, client +from litellm.litellm_core_utils.prompt_templates.common_utils import ( + DEFAULT_ASSISTANT_CONTINUE_MESSAGE, +) + from ..adapters.handler import LiteLLMMessagesToCompletionTransformationHandler from ..responses_adapters.handler import LiteLLMMessagesToResponsesAPIHandler from .utils import AnthropicMessagesRequestUtils, mock_response @@ -49,6 +53,58 @@ def _should_route_to_responses_api(custom_llm_provider: Optional[str]) -> bool: ################################################# +def _sanitize_anthropic_messages(messages: List[Dict]) -> List[Dict]: + """ + Sanitize messages for the /v1/messages endpoint. + + The Anthropic API can return assistant messages with empty text blocks + alongside tool_use blocks (e.g., {"type": "text", "text": ""}). While + the API returns these, it rejects them when sent back in subsequent + requests with "text content blocks must be non-empty". + + This is particularly common in multi-turn tool-use conversations (e.g., + Claude Code / Agent SDK) where the model starts a text block but + immediately switches to a tool_use block. + + The /v1/chat/completions path already handles this via + process_empty_text_blocks() in factory.py, but the /v1/messages path + was missing sanitization. + """ + for i, message in enumerate(messages): + content = message.get("content") + if not isinstance(content, list): + continue + + # Filter out empty text blocks, keeping non-empty text and other types. + # Use `(... or "")` to guard against None text values. + filtered = [ + block + for block in content + if not ( + isinstance(block, dict) + and block.get("type") == "text" + and not (block.get("text") or "").strip() + ) + ] + + # Only update if we actually removed something. + # Avoid mutating the caller's dicts — create a shallow copy. + if len(filtered) < len(content): + if len(filtered) > 0: + messages[i] = {**message, "content": filtered} + else: + # All blocks were empty text — replace with a continuation + # message rather than leaving empty blocks that trigger 400 + # errors. Matches behavior of process_empty_text_blocks() + # in factory.py. + messages[i] = { + **message, + "content": [{"type": "text", "text": DEFAULT_ASSISTANT_CONTINUE_MESSAGE.get("content", "Please continue.")}], + } + + return messages + + async def _execute_pre_request_hooks( model: str, messages: List[Dict], @@ -137,6 +193,10 @@ async def anthropic_messages( """ Async: Make llm api request in Anthropic /messages API spec """ + # Sanitize empty text blocks from messages before processing. + # See: https://github.com/BerriAI/litellm/issues/22930 + messages = _sanitize_anthropic_messages(messages) + # Execute pre-request hooks to allow CustomLoggers to modify request request_kwargs = await _execute_pre_request_hooks( model=model, diff --git a/litellm/llms/ollama/completion/transformation.py b/litellm/llms/ollama/completion/transformation.py index ed14b6a3318..1bb29e56b88 100644 --- a/litellm/llms/ollama/completion/transformation.py +++ b/litellm/llms/ollama/completion/transformation.py @@ -238,6 +238,12 @@ def get_model_info( or get_secret_str("OLLAMA_API_BASE") or "http://localhost:11434" ) + # Strip any endpoint paths that may have been appended by get_complete_url() + # to avoid malformed URLs like /api/generate/api/show + for endpoint in ["/api/generate", "/api/chat", "/api/embed"]: + if api_base.endswith(endpoint): + api_base = api_base[: -len(endpoint)] + break api_key = self.get_api_key() headers = {"Authorization": f"Bearer {api_key}"} if api_key else {} diff --git a/litellm/llms/ovhcloud/responses/__init__.py b/litellm/llms/ovhcloud/responses/__init__.py new file mode 100644 index 00000000000..536165166d6 --- /dev/null +++ b/litellm/llms/ovhcloud/responses/__init__.py @@ -0,0 +1 @@ +"""OVHCloud Responses API support""" diff --git a/litellm/llms/ovhcloud/responses/transformation.py b/litellm/llms/ovhcloud/responses/transformation.py new file mode 100644 index 00000000000..1b2413874d9 --- /dev/null +++ b/litellm/llms/ovhcloud/responses/transformation.py @@ -0,0 +1,113 @@ +""" +Support for OVHcloud AI Endpoints `/v1/responses` endpoint. + +Our unified API follows the OpenAI standard. +More information on our website: https://oai.endpoints.kepler.ai.cloud.ovh.net/doc/gpt-oss-20b/openapi.json +""" +from typing import Optional +import litellm +from litellm._logging import verbose_logger +from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig +from litellm.secret_managers.main import get_secret_str +from litellm.types.router import GenericLiteLLMParams +from litellm.types.utils import LlmProviders +from litellm.utils import get_model_info + +class OVHCloudResponsesAPIConfig(OpenAIResponsesAPIConfig): + """ + Configuration for OVHCloud AI Endpoints Responses API. + + Inherits from OpenAIResponsesAPIConfig since OVHCloud's Responses API follows + the OpenAI specification. + + Reference: https://oai.endpoints.kepler.ai.cloud.ovh.net/doc/gpt-oss-20b/openapi.json + """ + + @property + def custom_llm_provider(self) -> LlmProviders: + return LlmProviders.OVHCLOUD + + def get_supported_openai_params(self, model: str) -> list: + """ + Get supported OpenAI params, filtering tool-related params for models + that don't support function calling. + + Details about function calling support can be found here: + https://help.ovhcloud.com/csm/en-gb-public-cloud-ai-endpoints-function-calling?id=kb_article_view&sysparm_article=KB0071907 + """ + supported_params = super().get_supported_openai_params(model) + + supports_function_calling: Optional[bool] = None + try: + model_info = get_model_info(model, custom_llm_provider="ovhcloud") + supports_function_calling = model_info.get( + "supports_function_calling", False + ) + except Exception as e: + verbose_logger.debug(f"Error getting supported OpenAI params: {e}") + pass + + if supports_function_calling is not True: + verbose_logger.debug( + "You can see our models supporting function_calling in our catalog: https://www.ovhcloud.com/en/public-cloud/ai-endpoints/catalog/ " + ) + # Remove tool-related params for models that don't support function calling + for param in ("tools", "tool_choice"): + if param in supported_params: + supported_params.remove(param) + + return supported_params + + def validate_environment( + self, headers: dict, model: str, litellm_params: Optional[GenericLiteLLMParams] + ) -> dict: + """ + Validate environment and set up headers for OVHCloud API. + + Uses OVHCLOUD_API_KEY from environment or litellm_params. + """ + litellm_params = litellm_params or GenericLiteLLMParams() + api_key = ( + litellm_params.api_key + or litellm.api_key + or litellm.ovhcloud_key + or get_secret_str("OVHCLOUD_API_KEY") + ) + + if not api_key: + raise ValueError( + "OVHcloud AI Endpoints API key is required. Set OVHCLOUD_API_KEY environment variable or pass api_key parameter." + ) + + headers.update( + { + "Authorization": f"Bearer {api_key}", + } + ) + return headers + + def get_complete_url( + self, + api_base: Optional[str], + litellm_params: dict, + ) -> str: + """ + Get the complete URL for OVHcloud AI Endpoints Responses API endpoint. + + Returns: + str: The full URL for the OVHcloud AI Endpoints /v1/responses endpoint + """ + api_base = ( + api_base + or litellm.api_base + or get_secret_str("OVHCLOUD_API_BASE") + or "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1" + ) + + # Remove trailing slashes + api_base = api_base.rstrip("/") + + # Avoid double-appending /responses + if not api_base.endswith("/responses"): + return f"{api_base}/responses" + return api_base diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index f3bc4b08037..bd7b21c3b55 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -377,6 +377,9 @@ def generate_feedback_box(): from litellm.proxy.management_endpoints.jwt_key_mapping_endpoints import ( router as jwt_key_mapping_router, ) +from litellm.proxy.management_endpoints.jwt_key_mapping_endpoints import ( + router as jwt_key_mapping_router, +) from litellm.proxy.management_endpoints.key_management_endpoints import ( delete_verification_tokens, duration_in_seconds, diff --git a/litellm/utils.py b/litellm/utils.py index b7caf0edd7e..bffc069aeaf 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4672,6 +4672,10 @@ def add_provider_specific_params_to_optional_params( in ["openai", "azure", "text-completion-openai"] + litellm.openai_compatible_providers ): + litellm_internal_passthrough_blocklist = { + "vector_store_id", + "vector_store_ids", + } # for openai, azure we should pass the extra/passed params within `extra_body` https://github.com/openai/openai-python/blob/ac33853ba10d13ac149b1fa3ca6dba7d613065c9/src/openai/resources/models.py#L46 if ( _should_drop_param( @@ -4681,7 +4685,11 @@ def add_provider_specific_params_to_optional_params( ): extra_body = passed_params.pop("extra_body", None) or {} for k in passed_params.keys(): - if k not in openai_params and passed_params[k] is not None: + if ( + k not in openai_params + and k not in litellm_internal_passthrough_blocklist + and passed_params[k] is not None + ): extra_body[k] = passed_params[k] if not isinstance(optional_params.get("extra_body"), dict): optional_params["extra_body"] = {} @@ -8362,6 +8370,8 @@ def get_provider_responses_api_config( return litellm.OpenRouterResponsesAPIConfig() elif litellm.LlmProviders.HOSTED_VLLM == provider: return litellm.HostedVLLMResponsesAPIConfig() + elif litellm.LlmProviders.OVHCLOUD == provider: + return litellm.OVHCloudResponsesAPIConfig() return None @staticmethod diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 194af4895fe..4ae7c25e0b1 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -27832,208 +27832,6 @@ "max_tokens": 128000, "mode": "chat" }, - "ovhcloud/DeepSeek-R1-Distill-Llama-70B": { - "input_cost_per_token": 6.7e-07, - "litellm_provider": "ovhcloud", - "max_input_tokens": 131000, - "max_output_tokens": 131000, - "max_tokens": 131000, - "mode": "chat", - "output_cost_per_token": 6.7e-07, - "source": "https://endpoints.ai.cloud.ovh.net/models/deepseek-r1-distill-llama-70b", - "supports_function_calling": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_tool_choice": true - }, - "ovhcloud/Llama-3.1-8B-Instruct": { - "input_cost_per_token": 1e-07, - "litellm_provider": "ovhcloud", - "max_input_tokens": 131000, - "max_output_tokens": 131000, - "max_tokens": 131000, - "mode": "chat", - "output_cost_per_token": 1e-07, - "source": "https://endpoints.ai.cloud.ovh.net/models/llama-3-1-8b-instruct", - "supports_function_calling": true, - "supports_response_schema": true, - "supports_tool_choice": true - }, - "ovhcloud/Meta-Llama-3_1-70B-Instruct": { - "input_cost_per_token": 6.7e-07, - "litellm_provider": "ovhcloud", - "max_input_tokens": 131000, - "max_output_tokens": 131000, - "max_tokens": 131000, - "mode": "chat", - "output_cost_per_token": 6.7e-07, - "source": "https://endpoints.ai.cloud.ovh.net/models/meta-llama-3-1-70b-instruct", - "supports_function_calling": false, - "supports_response_schema": false, - "supports_tool_choice": false - }, - "ovhcloud/Meta-Llama-3_3-70B-Instruct": { - "input_cost_per_token": 6.7e-07, - "litellm_provider": "ovhcloud", - "max_input_tokens": 131000, - "max_output_tokens": 131000, - "max_tokens": 131000, - "mode": "chat", - "output_cost_per_token": 6.7e-07, - "source": "https://endpoints.ai.cloud.ovh.net/models/meta-llama-3-3-70b-instruct", - "supports_function_calling": true, - "supports_response_schema": true, - "supports_tool_choice": true - }, - "ovhcloud/Mistral-7B-Instruct-v0.3": { - "input_cost_per_token": 1e-07, - "litellm_provider": "ovhcloud", - "max_input_tokens": 127000, - "max_output_tokens": 127000, - "max_tokens": 127000, - "mode": "chat", - "output_cost_per_token": 1e-07, - "source": "https://endpoints.ai.cloud.ovh.net/models/mistral-7b-instruct-v0-3", - "supports_function_calling": true, - "supports_response_schema": true, - "supports_tool_choice": true - }, - "ovhcloud/Mistral-Nemo-Instruct-2407": { - "input_cost_per_token": 1.3e-07, - "litellm_provider": "ovhcloud", - "max_input_tokens": 118000, - "max_output_tokens": 118000, - "max_tokens": 118000, - "mode": "chat", - "output_cost_per_token": 1.3e-07, - "source": "https://endpoints.ai.cloud.ovh.net/models/mistral-nemo-instruct-2407", - "supports_function_calling": true, - "supports_response_schema": true, - "supports_tool_choice": true - }, - "ovhcloud/Mistral-Small-3.2-24B-Instruct-2506": { - "input_cost_per_token": 9e-08, - "litellm_provider": "ovhcloud", - "max_input_tokens": 128000, - "max_output_tokens": 128000, - "max_tokens": 128000, - "mode": "chat", - "output_cost_per_token": 2.8e-07, - "source": "https://endpoints.ai.cloud.ovh.net/models/mistral-small-3-2-24b-instruct-2506", - "supports_function_calling": true, - "supports_response_schema": true, - "supports_tool_choice": true, - "supports_vision": true - }, - "ovhcloud/Mixtral-8x7B-Instruct-v0.1": { - "input_cost_per_token": 6.3e-07, - "litellm_provider": "ovhcloud", - "max_input_tokens": 32000, - "max_output_tokens": 32000, - "max_tokens": 32000, - "mode": "chat", - "output_cost_per_token": 6.3e-07, - "source": "https://endpoints.ai.cloud.ovh.net/models/mixtral-8x7b-instruct-v0-1", - "supports_function_calling": false, - "supports_response_schema": true, - "supports_tool_choice": false - }, - "ovhcloud/Qwen2.5-Coder-32B-Instruct": { - "input_cost_per_token": 8.7e-07, - "litellm_provider": "ovhcloud", - "max_input_tokens": 32000, - "max_output_tokens": 32000, - "max_tokens": 32000, - "mode": "chat", - "output_cost_per_token": 8.7e-07, - "source": "https://endpoints.ai.cloud.ovh.net/models/qwen2-5-coder-32b-instruct", - "supports_function_calling": false, - "supports_response_schema": true, - "supports_tool_choice": false - }, - "ovhcloud/Qwen2.5-VL-72B-Instruct": { - "input_cost_per_token": 9.1e-07, - "litellm_provider": "ovhcloud", - "max_input_tokens": 32000, - "max_output_tokens": 32000, - "max_tokens": 32000, - "mode": "chat", - "output_cost_per_token": 9.1e-07, - "source": "https://endpoints.ai.cloud.ovh.net/models/qwen2-5-vl-72b-instruct", - "supports_function_calling": false, - "supports_response_schema": true, - "supports_tool_choice": false, - "supports_vision": true - }, - "ovhcloud/Qwen3-32B": { - "input_cost_per_token": 8e-08, - "litellm_provider": "ovhcloud", - "max_input_tokens": 32000, - "max_output_tokens": 32000, - "max_tokens": 32000, - "mode": "chat", - "output_cost_per_token": 2.3e-07, - "source": "https://endpoints.ai.cloud.ovh.net/models/qwen3-32b", - "supports_function_calling": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_tool_choice": true - }, - "ovhcloud/gpt-oss-120b": { - "input_cost_per_token": 8e-08, - "litellm_provider": "ovhcloud", - "max_input_tokens": 131000, - "max_output_tokens": 131000, - "max_tokens": 131000, - "mode": "chat", - "output_cost_per_token": 4e-07, - "source": "https://endpoints.ai.cloud.ovh.net/models/gpt-oss-120b", - "supports_function_calling": false, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_tool_choice": false - }, - "ovhcloud/gpt-oss-20b": { - "input_cost_per_token": 4e-08, - "litellm_provider": "ovhcloud", - "max_input_tokens": 131000, - "max_output_tokens": 131000, - "max_tokens": 131000, - "mode": "chat", - "output_cost_per_token": 1.5e-07, - "source": "https://endpoints.ai.cloud.ovh.net/models/gpt-oss-20b", - "supports_function_calling": false, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_tool_choice": false - }, - "ovhcloud/llava-v1.6-mistral-7b-hf": { - "input_cost_per_token": 2.9e-07, - "litellm_provider": "ovhcloud", - "max_input_tokens": 32000, - "max_output_tokens": 32000, - "max_tokens": 32000, - "mode": "chat", - "output_cost_per_token": 2.9e-07, - "source": "https://endpoints.ai.cloud.ovh.net/models/llava-next-mistral-7b", - "supports_function_calling": false, - "supports_response_schema": true, - "supports_tool_choice": false, - "supports_vision": true - }, - "ovhcloud/mamba-codestral-7B-v0.1": { - "input_cost_per_token": 1.9e-07, - "litellm_provider": "ovhcloud", - "max_input_tokens": 256000, - "max_output_tokens": 256000, - "max_tokens": 256000, - "mode": "chat", - "output_cost_per_token": 1.9e-07, - "source": "https://endpoints.ai.cloud.ovh.net/models/mamba-codestral-7b-v0-1", - "supports_function_calling": false, - "supports_response_schema": true, - "supports_tool_choice": false - }, "palm/chat-bison": { "input_cost_per_token": 1.25e-07, "litellm_provider": "palm", @@ -39815,5 +39613,170 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true + }, + "ovhcloud/Qwen3Guard-Gen-8B": { + "litellm_provider": "ovhcloud", + "mode": "chat", + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768 + }, + "ovhcloud/Qwen3Guard-Gen-0.6B": { + "litellm_provider": "ovhcloud", + "mode": "chat", + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768 + }, + "ovhcloud/Meta-Llama-3_3-70B-Instruct": { + "litellm_provider": "ovhcloud", + "mode": "chat", + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 7.4e-07, + "output_cost_per_token": 7.4e-07, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/Llama-3.1-8B-Instruct": { + "litellm_provider": "ovhcloud", + "mode": "chat", + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1.1e-07, + "output_cost_per_token": 1.1e-07, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/Qwen2.5-VL-72B-Instruct": { + "litellm_provider": "ovhcloud", + "mode": "chat", + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1.01e-06, + "output_cost_per_token": 1.01e-06, + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "ovhcloud/Qwen3-Coder-30B-A3B-Instruct": { + "litellm_provider": "ovhcloud", + "mode": "chat", + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 7e-08, + "output_cost_per_token": 2.6e-07, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_reasoning": false + }, + "ovhcloud/Mistral-Small-3.2-24B-Instruct-2506": { + "litellm_provider": "ovhcloud", + "mode": "chat", + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 3.1e-07, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "ovhcloud/Mistral-Nemo-Instruct-2407": { + "litellm_provider": "ovhcloud", + "mode": "chat", + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 1.4e-07, + "output_cost_per_token": 1.4e-07, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/Mixtral-8x7B-Instruct-v0.1": { + "litellm_provider": "ovhcloud", + "mode": "chat", + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 7e-07, + "output_cost_per_token": 7e-07, + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "ovhcloud/Qwen3-32B": { + "litellm_provider": "ovhcloud", + "mode": "chat", + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-08, + "output_cost_per_token": 2.5e-07, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, + "ovhcloud/DeepSeek-R1-Distill-Llama-70B": { + "litellm_provider": "ovhcloud", + "mode": "chat", + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 7.4e-07, + "output_cost_per_token": 7.4e-07, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, + "ovhcloud/gpt-oss-20b": { + "litellm_provider": "ovhcloud", + "mode": "chat", + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 5e-08, + "output_cost_per_token": 1.8e-07, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, + "ovhcloud/Mistral-7B-Instruct-v0.3": { + "litellm_provider": "ovhcloud", + "mode": "chat", + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 1.1e-07, + "output_cost_per_token": 1.1e-07, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/gpt-oss-120b": { + "litellm_provider": "ovhcloud", + "mode": "chat", + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-08, + "output_cost_per_token": 4.7e-07, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_reasoning": true } -} +} \ No newline at end of file diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py index 2f78f27361e..0485b2187d4 100644 --- a/tests/local_testing/test_completion_cost.py +++ b/tests/local_testing/test_completion_cost.py @@ -2945,3 +2945,77 @@ def test_batch_cost_calculator(): cost = completion_cost(**args) assert cost > 0 + + +def test_cost_calculator_base_model_cross_provider(): + """ + When base_model has a different provider prefix than the deployment, + custom_llm_provider should be updated so cost_per_token builds the + correct model key. Regression test for #22257. + """ + resp = litellm.completion( + model="anthropic/my-custom-deployment", + messages=[{"role": "user", "content": "Hello"}], + base_model="gemini/gemini-2.0-flash", + mock_response="Hi there!", + ) + assert resp._hidden_params["response_cost"] > 0 + + +def test_cost_calculator_base_model_cross_provider_direct(): + """ + Direct completion_cost unit test for cross-provider base_model override. + Verifies that completion_cost correctly routes to the base_model provider. + """ + from litellm import ModelResponse, Usage + + resp = ModelResponse( + id="chatcmpl-test", + model="gemini/gemini-2.0-flash", + usage=Usage(prompt_tokens=10, completion_tokens=20, total_tokens=30), + ) + cost = completion_cost( + model="anthropic/my-custom-deployment", + completion_response=resp, + base_model="gemini/gemini-2.0-flash", + custom_llm_provider="anthropic", + ) + assert cost > 0 + + +def test_cost_calculator_base_model_cross_provider_hidden_params_guard(): + """ + Verify that hidden_params.custom_llm_provider does not undo the + base_model provider override when the response carries a stale provider. + """ + from litellm import ModelResponse, Usage + + resp = ModelResponse( + id="chatcmpl-guard", + model="gemini/gemini-2.0-flash", + usage=Usage(prompt_tokens=10, completion_tokens=20, total_tokens=30), + ) + # Simulate hidden_params carrying the original (wrong) provider + resp._hidden_params = {"custom_llm_provider": "anthropic"} + + cost = completion_cost( + model="anthropic/my-custom-deployment", + completion_response=resp, + base_model="gemini/gemini-2.0-flash", + custom_llm_provider="anthropic", + ) + assert cost > 0 + + +def test_cost_calculator_base_model_same_provider_no_regression(): + """ + When base_model has the same provider prefix as the deployment, + custom_llm_provider should remain unchanged (no-regression). + """ + resp = litellm.completion( + model="openai/my-custom-deployment", + messages=[{"role": "user", "content": "Hello"}], + base_model="openai/gpt-4o", + mock_response="Hi there!", + ) + assert resp._hidden_params["response_cost"] > 0 diff --git a/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py b/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py index 636e84fe796..243a06ed716 100644 --- a/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py +++ b/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py @@ -190,6 +190,152 @@ def test_openai_model_with_thinking_converts_to_reasoning(): assert "thinking" not in call_kwargs, "thinking should NOT be passed directly to litellm.responses" +class TestSanitizeAnthropicMessages: + """Tests for _sanitize_anthropic_messages which strips empty text blocks.""" + + def test_strips_empty_text_block_alongside_tool_use(self): + """The most common case: model returns empty text + tool_use.""" + from litellm.llms.anthropic.experimental_pass_through.messages.handler import ( + _sanitize_anthropic_messages, + ) + + messages = [ + {"role": "user", "content": "Use the bash tool to list files"}, + { + "role": "assistant", + "content": [ + {"type": "text", "text": ""}, + {"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {"cmd": "ls"}}, + ], + }, + ] + result = _sanitize_anthropic_messages(messages) + assistant = result[1] + assert len(assistant["content"]) == 1 + assert assistant["content"][0]["type"] == "tool_use" + + def test_strips_whitespace_only_text_block(self): + from litellm.llms.anthropic.experimental_pass_through.messages.handler import ( + _sanitize_anthropic_messages, + ) + + messages = [ + { + "role": "assistant", + "content": [ + {"type": "text", "text": " \n "}, + {"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {}}, + ], + }, + ] + result = _sanitize_anthropic_messages(messages) + assert len(result[0]["content"]) == 1 + assert result[0]["content"][0]["type"] == "tool_use" + + def test_preserves_non_empty_text_blocks(self): + from litellm.llms.anthropic.experimental_pass_through.messages.handler import ( + _sanitize_anthropic_messages, + ) + + messages = [ + { + "role": "assistant", + "content": [ + {"type": "text", "text": "I'll run that for you."}, + {"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {}}, + ], + }, + ] + result = _sanitize_anthropic_messages(messages) + assert len(result[0]["content"]) == 2 + + def test_replaces_all_empty_blocks_with_continuation(self): + """If ALL blocks are empty text, replace with a continuation message.""" + from litellm.llms.anthropic.experimental_pass_through.messages.handler import ( + _sanitize_anthropic_messages, + ) + from litellm.litellm_core_utils.prompt_templates.common_utils import ( + DEFAULT_ASSISTANT_CONTINUE_MESSAGE, + ) + + messages = [ + { + "role": "assistant", + "content": [{"type": "text", "text": ""}], + }, + ] + result = _sanitize_anthropic_messages(messages) + assert len(result[0]["content"]) == 1 + assert result[0]["content"][0]["type"] == "text" + assert result[0]["content"][0]["text"] == DEFAULT_ASSISTANT_CONTINUE_MESSAGE.get("content", "Please continue.") + + def test_handles_string_content(self): + """String content (not list) should pass through unchanged.""" + from litellm.llms.anthropic.experimental_pass_through.messages.handler import ( + _sanitize_anthropic_messages, + ) + + messages = [{"role": "user", "content": "Hello"}] + result = _sanitize_anthropic_messages(messages) + assert result[0]["content"] == "Hello" + + def test_handles_user_messages_too(self): + """User messages can also have content lists with empty text blocks.""" + from litellm.llms.anthropic.experimental_pass_through.messages.handler import ( + _sanitize_anthropic_messages, + ) + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": ""}, + {"type": "tool_result", "tool_use_id": "toolu_123", "content": "file1.txt"}, + ], + }, + ] + result = _sanitize_anthropic_messages(messages) + assert len(result[0]["content"]) == 1 + assert result[0]["content"][0]["type"] == "tool_result" + + + def test_handles_none_text_value(self): + """Text blocks with None text value should be treated as empty, not crash.""" + from litellm.llms.anthropic.experimental_pass_through.messages.handler import ( + _sanitize_anthropic_messages, + ) + + messages = [ + { + "role": "assistant", + "content": [ + {"type": "text", "text": None}, + {"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {}}, + ], + }, + ] + result = _sanitize_anthropic_messages(messages) + assert len(result[0]["content"]) == 1 + assert result[0]["content"][0]["type"] == "tool_use" + + def test_does_not_mutate_original_message(self): + """Sanitized messages should be shallow copies, not mutated originals.""" + from litellm.llms.anthropic.experimental_pass_through.messages.handler import ( + _sanitize_anthropic_messages, + ) + + original_content = [ + {"type": "text", "text": ""}, + {"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {}}, + ] + messages = [{"role": "assistant", "content": original_content}] + result = _sanitize_anthropic_messages(messages) + # Original content list should be unchanged + assert len(original_content) == 2 + # Result message should be a different dict + assert len(result[0]["content"]) == 1 + + class TestThinkingParameterTransformation: """Core tests for thinking parameter transformation logic.""" diff --git a/tests/test_litellm/llms/ollama/test_ollama_model_info.py b/tests/test_litellm/llms/ollama/test_ollama_model_info.py index 5585e9d1e0e..fdddfb87dc0 100644 --- a/tests/test_litellm/llms/ollama/test_ollama_model_info.py +++ b/tests/test_litellm/llms/ollama/test_ollama_model_info.py @@ -219,6 +219,36 @@ def mock_post(url, json, headers=None): config.get_model_info("ollama_chat/llama3", api_base="http://localhost:11434") assert captured_json[1]["name"] == "llama3" + def test_get_model_info_strips_endpoint_paths_from_api_base(self, monkeypatch): + """When api_base contains endpoint paths like /api/generate, they should be stripped before appending /api/show.""" + from litellm.llms.ollama.completion.transformation import OllamaConfig + + captured_urls = [] + + def mock_post(url, json, headers=None): + captured_urls.append(url) + return DummyResponse({"template": "", "model_info": {}}, status_code=200) + + monkeypatch.setattr("litellm.module_level_client.post", mock_post) + + config = OllamaConfig() + + # Test with /api/generate endpoint already appended + config.get_model_info("llama3", api_base="http://my-server:11434/api/generate") + assert captured_urls[0] == "http://my-server:11434/api/show" + + # Test with /api/chat endpoint already appended + config.get_model_info("llama3", api_base="http://my-server:11434/api/chat") + assert captured_urls[1] == "http://my-server:11434/api/show" + + # Test with /api/embed endpoint already appended + config.get_model_info("llama3", api_base="http://my-server:11434/api/embed") + assert captured_urls[2] == "http://my-server:11434/api/show" + + # Test with clean base URL (should still work) + config.get_model_info("llama3", api_base="http://my-server:11434") + assert captured_urls[3] == "http://my-server:11434/api/show" + class TestOllamaAuthHeaders: """Tests for Ollama authentication header handling in completion calls.""" diff --git a/tests/test_litellm/llms/ovhcloud/responses/test_ovhcloud_responses_transformation.py b/tests/test_litellm/llms/ovhcloud/responses/test_ovhcloud_responses_transformation.py new file mode 100644 index 00000000000..063869210f4 --- /dev/null +++ b/tests/test_litellm/llms/ovhcloud/responses/test_ovhcloud_responses_transformation.py @@ -0,0 +1,99 @@ +""" +Tests for OVHCloud Responses API transformation + +Tests the OVHCloudResponsesAPIConfig class that handles OVHCloud-specific +transformations for the Responses API. + +Source: litellm/llms/ovhcloud/responses/transformation.py +""" +import os +import sys + +sys.path.insert(0, os.path.abspath("../../../../..")) + +import pytest + +from litellm.llms.ovhcloud.responses.transformation import OVHCloudResponsesAPIConfig +from litellm.types.router import GenericLiteLLMParams +from litellm.types.utils import LlmProviders +from litellm.utils import ProviderConfigManager + + +class TestOVHCloudResponsesAPITransformation: + """Test OVHCloud Responses API configuration and transformations""" + + def test_ovhcloud_provider_config_registration(self): + """Test that OVHCloud provider returns OVHCloudResponsesAPIConfig""" + config = ProviderConfigManager.get_provider_responses_api_config( + model="ovhcloud/gpt-oss-120b", + provider=LlmProviders.OVHCLOUD, + ) + + assert config is not None, "Config should not be None for OVHCloud provider" + assert isinstance( + config, OVHCloudResponsesAPIConfig + ), f"Expected OVHCloudResponsesAPIConfig, got {type(config)}" + assert ( + config.custom_llm_provider == LlmProviders.OVHCLOUD + ), "custom_llm_provider should be OVHCLOUD" + + def test_ovhcloud_responses_endpoint_url(self): + """Test that get_complete_url returns correct OVHCloud endpoint""" + config = OVHCloudResponsesAPIConfig() + + # Test with default OVHCloud API base + url = config.get_complete_url(api_base=None, litellm_params={}) + assert url == "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1/responses", f"Expected OVHCloud responses endpoint, got {url}" + + # Test with custom api_base + custom_url = config.get_complete_url( + api_base="https://custom.ovhcloud.example.com/v1", + litellm_params={} + ) + assert custom_url == "https://custom.ovhcloud.example.com/v1/responses", f"Expected custom endpoint, got {custom_url}" + + # Test with trailing slash + url_with_slash = config.get_complete_url( + api_base="https://oai.endpoints.kepler.ai.cloud.ovh.net/v1/", + litellm_params={} + ) + assert url_with_slash == "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1/responses", "Should handle trailing slash" + + def test_validate_environment_with_api_key(self): + """Test that validate_environment sets Authorization header correctly""" + config = OVHCloudResponsesAPIConfig() + + headers = {} + litellm_params = GenericLiteLLMParams(api_key="test-api-key-123") + + result = config.validate_environment( + headers=headers, + model="ovhcloud/gpt-oss-120b", + litellm_params=litellm_params + ) + + assert "Authorization" in result + assert result["Authorization"] == "Bearer test-api-key-123" + + def test_validate_environment_missing_api_key(self): + """Test that validate_environment raises error when API key is missing""" + config = OVHCloudResponsesAPIConfig() + + headers = {} + + with pytest.raises(ValueError, match="OVHcloud AI Endpoints API key is required"): + config.validate_environment( + headers=headers, + model="ovhcloud/gpt-oss-120b", + litellm_params=None + ) + + def test_supported_params_includes_openai_params(self): + """Test that get_supported_openai_params includes standard OpenAI params""" + config = OVHCloudResponsesAPIConfig() + supported = config.get_supported_openai_params("ovhcloud/gpt-oss-120b") + + # OVHCloud follows OpenAI spec, so should support standard params + assert "model" in supported, "model should be supported" + assert "input" in supported, "input should be supported" + assert "temperature" in supported, "temperature should be supported" diff --git a/tests/test_litellm/test_utils.py b/tests/test_litellm/test_utils.py index c4073cb96d7..0a07ce91f61 100644 --- a/tests/test_litellm/test_utils.py +++ b/tests/test_litellm/test_utils.py @@ -3435,6 +3435,43 @@ def test_additional_drop_params_empty_list_keeps_all_params(self): assert result.get("prompt_cache_key") == "test_key" assert result.get("custom_param") == "value" + def test_openai_compatible_params_do_not_forward_vector_store_ids(self): + from litellm.utils import add_provider_specific_params_to_optional_params + + optional_params = {} + passed_params = { + "temperature": 0.3, + "vector_store_ids": ["vs_123"], + "vector_store_id": "vs_legacy", + "custom_param": "keep_me", + } + + result = add_provider_specific_params_to_optional_params( + optional_params=optional_params, + passed_params=passed_params, + custom_llm_provider="azure", + openai_params=["temperature", "model"], + additional_drop_params=None, + ) + + assert "extra_body" in result + assert result["extra_body"].get("custom_param") == "keep_me" + assert "vector_store_ids" not in result["extra_body"] + assert "vector_store_id" not in result["extra_body"] + + def test_get_optional_params_azure_gpt5_drops_vector_store_ids_from_extra_body(self): + from litellm.utils import get_optional_params + + optional_params = get_optional_params( + model="gpt-5.2", + custom_llm_provider="azure", + temperature=1, + vector_store_ids=["vs_123"], + ) + + assert "extra_body" in optional_params + assert "vector_store_ids" not in optional_params["extra_body"] + class TestDropParamsWithPromptCacheKey: """