From b9a8d42882ef014b7479ccc7926c7002b8336b4e Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Thu, 5 Mar 2026 23:26:24 +0530 Subject: [PATCH 1/2] Add day 0 support for gpt-5.4 --- docs/my-website/blog/gpt_5_4/index.md | 96 +++++++++++++++++++ .../llms/azure/chat/gpt_5_transformation.py | 12 +-- .../llms/openai/chat/gpt_5_transformation.py | 23 +++-- ...odel_prices_and_context_window_backup.json | 37 +++++++ model_prices_and_context_window.json | 37 +++++++ 5 files changed, 190 insertions(+), 15 deletions(-) create mode 100644 docs/my-website/blog/gpt_5_4/index.md diff --git a/docs/my-website/blog/gpt_5_4/index.md b/docs/my-website/blog/gpt_5_4/index.md new file mode 100644 index 0000000000..b4e68096c0 --- /dev/null +++ b/docs/my-website/blog/gpt_5_4/index.md @@ -0,0 +1,96 @@ +--- +slug: gpt_5_4 +title: "GPT-5.4 Model Support" +date: 2026-03-05T10:00:00 +authors: + - name: Sameer Kankute + title: SWE @ LiteLLM (LLM Translation) + url: https://www.linkedin.com/in/sameer-kankute/ + image_url: https://pbs.twimg.com/profile_images/2001352686994907136/ONgNuSk5_400x400.jpg + - name: Krrish Dholakia + title: "CEO, LiteLLM" + url: https://www.linkedin.com/in/krish-d/ + image_url: https://pbs.twimg.com/profile_images/1298587542745358340/DZv3Oj-h_400x400.jpg + - name: Ishaan Jaff + title: "CTO, LiteLLM" + url: https://www.linkedin.com/in/reffajnaahsi/ + image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg +description: "GPT-5.4 model support in LiteLLM" +tags: [openai, gpt-5.4, completion] +hide_table_of_contents: false +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +LiteLLM now supports fully GPT-5.4! + +## Docker Image + +```bash +docker pull ghcr.io/berriai/litellm:v1.81.14-stable.gpt-4o +``` + +## Usage + + + + +**1. Setup config.yaml** + +```yaml +model_list: + - model_name: gpt-5.4 + litellm_params: + model: openai/gpt-5.4 + api_key: os.environ/OPENAI_API_KEY +``` + +**2. Start the proxy** + +```bash +docker run -d \ + -p 4000:4000 \ + -e OPENAI_API_KEY=$OPENAI_API_KEY \ + -v $(pwd)/config.yaml:/app/config.yaml \ + ghcr.io/berriai/litellm:v1.81.14-stable.gpt-4o \ + --config /app/config.yaml +``` + +**3. Test it** + +```bash +curl -X POST "http://0.0.0.0:4000/chat/completions" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $LITELLM_KEY" \ + -d '{ + "model": "gpt-5.4", + "messages": [ + {"role": "user", "content": "Write a Python function to check if a number is prime."} + ] + }' +``` + + + + +```python +from litellm import completion + +response = completion( + model="openai/gpt-5.4", + messages=[ + {"role": "user", "content": "Write a Python function to check if a number is prime."} + ], +) + +print(response.choices[0].message.content) +``` + + + + +## Notes + +- Use `/responses` for better model performance. +- GPT-5.4 supports reasoning, function calling, vision, and tool-use — see the [OpenAI provider docs](../../docs/providers/openai) for advanced usage. diff --git a/litellm/llms/azure/chat/gpt_5_transformation.py b/litellm/llms/azure/chat/gpt_5_transformation.py index 2a2955fca3..a70e008d66 100644 --- a/litellm/llms/azure/chat/gpt_5_transformation.py +++ b/litellm/llms/azure/chat/gpt_5_transformation.py @@ -28,8 +28,8 @@ def is_model_gpt_5_model(cls, model: str) -> bool: def get_supported_openai_params(self, model: str) -> List[str]: """Get supported parameters for Azure OpenAI GPT-5 models. - Azure OpenAI GPT-5.2 models support logprobs, unlike OpenAI's GPT-5. - This overrides the parent class to add logprobs support back for gpt-5.2. + Azure OpenAI GPT-5.2/5.4 models support logprobs, unlike OpenAI's GPT-5. + This overrides the parent class to add logprobs support back for gpt-5.2+. Reference: - Tested with Azure OpenAI GPT-5.2 (api-version: 2025-01-01-preview) @@ -43,9 +43,9 @@ def get_supported_openai_params(self, model: str) -> List[str]: if "tool_choice" not in params: params.append("tool_choice") - # Only gpt-5.2 has been verified to support logprobs on Azure. + # Only gpt-5.2+ has been verified to support logprobs on Azure. # The base OpenAI class includes logprobs for gpt-5.1+, but Azure - # hasn't verified support for gpt-5.1, so remove them unless gpt-5.2. + # hasn't verified support for gpt-5.1, so remove them unless gpt-5.2/5.4+. if self.is_model_gpt_5_1_model(model) and not self.is_model_gpt_5_2_model(model): params = [p for p in params if p not in ["logprobs", "top_logprobs"]] elif self.is_model_gpt_5_2_model(model): @@ -67,7 +67,7 @@ def map_openai_params( or optional_params.get("reasoning_effort") ) - # gpt-5.1 supports reasoning_effort='none', but other gpt-5 models don't + # gpt-5.1/5.2/5.4 support reasoning_effort='none', but other gpt-5 models don't # See: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/reasoning is_gpt_5_1 = self.is_model_gpt_5_1_model(model) @@ -101,7 +101,7 @@ def map_openai_params( drop_params=drop_params, ) - # Only drop reasoning_effort='none' for non-gpt-5.1 models + # Only drop reasoning_effort='none' for non-gpt-5.1/5.2/5.4 models if result.get("reasoning_effort") == "none" and not is_gpt_5_1: result.pop("reasoning_effort") diff --git a/litellm/llms/openai/chat/gpt_5_transformation.py b/litellm/llms/openai/chat/gpt_5_transformation.py index 014e80f0a3..5f0c58e78a 100644 --- a/litellm/llms/openai/chat/gpt_5_transformation.py +++ b/litellm/llms/openai/chat/gpt_5_transformation.py @@ -48,11 +48,11 @@ def is_model_gpt_5_1_codex_max_model(cls, model: str) -> bool: @classmethod def is_model_gpt_5_1_model(cls, model: str) -> bool: - """Check if the model is a gpt-5.1 or gpt-5.2 chat variant. + """Check if the model is a gpt-5.1, gpt-5.2, or gpt-5.4 chat variant. - gpt-5.1/5.2 support temperature when reasoning_effort="none", + gpt-5.1/5.2/5.4 support temperature when reasoning_effort="none", unlike base gpt-5 which only supports temperature=1. Excludes - pro variants which keep stricter knobs and gpt-5.2-chat variants + pro variants which keep stricter knobs and chat-only variants which only support temperature=1. """ model_name = model.split("/")[-1] @@ -62,7 +62,12 @@ def is_model_gpt_5_1_model(cls, model: str) -> bool: and "pro" not in model_name and not model_name.startswith("gpt-5.2-chat") ) - return is_gpt_5_1 or is_gpt_5_2 + is_gpt_5_4 = ( + model_name.startswith("gpt-5.4") + and "pro" not in model_name + and not model_name.startswith("gpt-5.4-chat") + ) + return is_gpt_5_1 or is_gpt_5_2 or is_gpt_5_4 @classmethod def is_model_gpt_5_2_pro_model(cls, model: str) -> bool: @@ -74,7 +79,7 @@ def is_model_gpt_5_2_pro_model(cls, model: str) -> bool: def is_model_gpt_5_2_model(cls, model: str) -> bool: """Check if the model is a gpt-5.2 variant (including pro).""" model_name = model.split("/")[-1] - return model_name.startswith("gpt-5.2") + return model_name.startswith("gpt-5.2") or model_name.startswith("gpt-5.4") def get_supported_openai_params(self, model: str) -> list: if self.is_model_gpt_5_search_model(model): @@ -113,7 +118,7 @@ def get_supported_openai_params(self, model: str) -> list: "web_search_options", ] - # gpt-5.1/5.2 support logprobs, top_p, top_logprobs when reasoning_effort="none" + # gpt-5.1/5.2/5.4 support logprobs, top_p, top_logprobs when reasoning_effort="none" if not self.is_model_gpt_5_1_model(model): non_supported_params.extend(["logprobs", "top_p", "top_logprobs"]) @@ -156,7 +161,7 @@ def map_openai_params( else: raise litellm.utils.UnsupportedParamsError( message=( - "reasoning_effort='xhigh' is only supported for gpt-5.1-codex-max and gpt-5.2 models." + "reasoning_effort='xhigh' is only supported for gpt-5.1-codex-max, gpt-5.2, and gpt-5.4+ models." ), status_code=400, ) @@ -170,7 +175,7 @@ def map_openai_params( "max_tokens" ) - # gpt-5.1/5.2 support logprobs, top_p, top_logprobs only when reasoning_effort="none" + # gpt-5.1/5.2/5.4 support logprobs, top_p, top_logprobs only when reasoning_effort="none" if self.is_model_gpt_5_1_model(model): sampling_params = ["logprobs", "top_logprobs", "top_p"] has_sampling = any(p in non_default_params for p in sampling_params) @@ -181,7 +186,7 @@ def map_openai_params( else: raise litellm.utils.UnsupportedParamsError( message=( - "gpt-5.1/5.2 only support logprobs, top_p, top_logprobs when " + "gpt-5.1/5.2/5.4 only support logprobs, top_p, top_logprobs when " "reasoning_effort='none'. Current reasoning_effort='{}'. " "To drop unsupported params set `litellm.drop_params = True`" ).format(reasoning_effort), diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 1e0a789942..71498abad5 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -20810,6 +20810,43 @@ "supports_vision": true, "supports_web_search": true }, + "gpt-5.4": { + "cache_read_input_token_cost": 2.5e-07, + "cache_read_input_token_cost_priority": 5e-07, + "input_cost_per_token": 2.5e-06, + "input_cost_per_token_priority": 5e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_priority": 3e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, "gpt-5-pro": { "input_cost_per_token": 1.5e-05, "input_cost_per_token_batches": 7.5e-06, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 1e0a789942..71498abad5 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -20810,6 +20810,43 @@ "supports_vision": true, "supports_web_search": true }, + "gpt-5.4": { + "cache_read_input_token_cost": 2.5e-07, + "cache_read_input_token_cost_priority": 5e-07, + "input_cost_per_token": 2.5e-06, + "input_cost_per_token_priority": 5e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_priority": 3e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, "gpt-5-pro": { "input_cost_per_token": 1.5e-05, "input_cost_per_token_batches": 7.5e-06, From 3b457b5d8ee1191a9c972cda189470d6c184162b Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Thu, 5 Mar 2026 23:40:24 +0530 Subject: [PATCH 2/2] Add day 0 support for gpt-5.4 --- ...odel_prices_and_context_window_backup.json | 39 ++++++++++++++++++- model_prices_and_context_window.json | 39 ++++++++++++++++++- 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 71498abad5..61a70fc0d5 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -20816,12 +20816,12 @@ "input_cost_per_token": 2.5e-06, "input_cost_per_token_priority": 5e-06, "litellm_provider": "openai", - "max_input_tokens": 272000, + "max_input_tokens": 1050000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 1.5e-05, - "output_cost_per_token_priority": 3e-05, + "output_cost_per_token_priority": 2.25e-05, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -20832,9 +20832,44 @@ "image" ], "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-5.4-2026-03-05": { + "cache_read_input_token_cost": 2.5e-07, + "cache_read_input_token_cost_priority": 5e-07, + "input_cost_per_token": 2.5e-06, + "input_cost_per_token_priority": 5e-06, + "litellm_provider": "openai", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_priority": 2.25e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ "text", "image" ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 71498abad5..61a70fc0d5 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -20816,12 +20816,12 @@ "input_cost_per_token": 2.5e-06, "input_cost_per_token_priority": 5e-06, "litellm_provider": "openai", - "max_input_tokens": 272000, + "max_input_tokens": 1050000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 1.5e-05, - "output_cost_per_token_priority": 3e-05, + "output_cost_per_token_priority": 2.25e-05, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -20832,9 +20832,44 @@ "image" ], "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-5.4-2026-03-05": { + "cache_read_input_token_cost": 2.5e-07, + "cache_read_input_token_cost_priority": 5e-07, + "input_cost_per_token": 2.5e-06, + "input_cost_per_token_priority": 5e-06, + "litellm_provider": "openai", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_priority": 2.25e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ "text", "image" ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true,