BerriAI · Sameerlite · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026 · greptile-apps
diff --git a/docs/my-website/blog/gpt_5_4/index.md b/docs/my-website/blog/gpt_5_4/index.md
@@ -0,0 +1,96 @@
+---
+slug: gpt_5_4
+title: "GPT-5.4 Model Support"
+date: 2026-03-05T10:00:00
+authors:
+  - name: Sameer Kankute
+    title: SWE @ LiteLLM (LLM Translation)
+    url: https://www.linkedin.com/in/sameer-kankute/
+    image_url: https://pbs.twimg.com/profile_images/2001352686994907136/ONgNuSk5_400x400.jpg
+  - name: Krrish Dholakia
+    title: "CEO, LiteLLM"
+    url: https://www.linkedin.com/in/krish-d/
+    image_url: https://pbs.twimg.com/profile_images/1298587542745358340/DZv3Oj-h_400x400.jpg
+  - name: Ishaan Jaff
+    title: "CTO, LiteLLM"
+    url: https://www.linkedin.com/in/reffajnaahsi/
+    image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
+description: "GPT-5.4 model support in LiteLLM"
+tags: [openai, gpt-5.4, completion]
+hide_table_of_contents: false
+---
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+LiteLLM now supports fully GPT-5.4!
+
+## Docker Image
+
+```bash
+docker pull ghcr.io/berriai/litellm:v1.81.14-stable.gpt-4o
+```
+
-docker pull ghcr.io/berriai/litellm:v1.81.14-stable.gpt-4o
-```
+docker pull ghcr.io/berriai/litellm:main-latest
-docker pull ghcr.io/berriai/litellm:v1.81.14-stable.gpt-4o
-```
+docker pull ghcr.io/berriai/litellm:main-latest
+## Usage
+
+<Tabs>
+<TabItem value="proxy" label="LiteLLM Proxy">
+
+**1. Setup config.yaml**
+
+```yaml
+model_list:
+  - model_name: gpt-5.4
+    litellm_params:
+      model: openai/gpt-5.4
+      api_key: os.environ/OPENAI_API_KEY
+```
+
+**2. Start the proxy**
+
+```bash
+docker run -d \
+  -p 4000:4000 \
+  -e OPENAI_API_KEY=$OPENAI_API_KEY \
+  -v $(pwd)/config.yaml:/app/config.yaml \
+  ghcr.io/berriai/litellm:v1.81.14-stable.gpt-4o \
+  --config /app/config.yaml
+```
+
+**3. Test it**
+
+```bash
+curl -X POST "http://0.0.0.0:4000/chat/completions" \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $LITELLM_KEY" \
+  -d '{
+    "model": "gpt-5.4",
+    "messages": [
+      {"role": "user", "content": "Write a Python function to check if a number is prime."}
+    ]
+  }'
+```
+
+</TabItem>
+<TabItem value="sdk" label="LiteLLM SDK">
+
+```python
+from litellm import completion
+
+response = completion(
+    model="openai/gpt-5.4",
+    messages=[
+        {"role": "user", "content": "Write a Python function to check if a number is prime."}
+    ],
+)
+
+print(response.choices[0].message.content)
+```
+
+</TabItem>
+</Tabs>
+
+## Notes
+
+- Use `/responses` for better model performance.
+- GPT-5.4 supports reasoning, function calling, vision, and tool-use — see the [OpenAI provider docs](../../docs/providers/openai) for advanced usage.
diff --git a/litellm/llms/azure/chat/gpt_5_transformation.py b/litellm/llms/azure/chat/gpt_5_transformation.py
@@ -28,8 +28,8 @@ def is_model_gpt_5_model(cls, model: str) -> bool:
     def get_supported_openai_params(self, model: str) -> List[str]:
         """Get supported parameters for Azure OpenAI GPT-5 models.
 
-        Azure OpenAI GPT-5.2 models support logprobs, unlike OpenAI's GPT-5.
-        This overrides the parent class to add logprobs support back for gpt-5.2.
+        Azure OpenAI GPT-5.2/5.4 models support logprobs, unlike OpenAI's GPT-5.
+        This overrides the parent class to add logprobs support back for gpt-5.2+.
 
         Reference:
         - Tested with Azure OpenAI GPT-5.2 (api-version: 2025-01-01-preview)
@@ -43,9 +43,9 @@ def get_supported_openai_params(self, model: str) -> List[str]:
         if "tool_choice" not in params:
             params.append("tool_choice")
 
-        # Only gpt-5.2 has been verified to support logprobs on Azure.
+        # Only gpt-5.2+ has been verified to support logprobs on Azure.
         # The base OpenAI class includes logprobs for gpt-5.1+, but Azure
-        # hasn't verified support for gpt-5.1, so remove them unless gpt-5.2.
+        # hasn't verified support for gpt-5.1, so remove them unless gpt-5.2/5.4+.
         if self.is_model_gpt_5_1_model(model) and not self.is_model_gpt_5_2_model(model):
             params = [p for p in params if p not in ["logprobs", "top_logprobs"]]
         elif self.is_model_gpt_5_2_model(model):
@@ -67,7 +67,7 @@ def map_openai_params(
             or optional_params.get("reasoning_effort")
         )
 
-        # gpt-5.1 supports reasoning_effort='none', but other gpt-5 models don't
+        # gpt-5.1/5.2/5.4 support reasoning_effort='none', but other gpt-5 models don't
         # See: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/reasoning
         is_gpt_5_1 = self.is_model_gpt_5_1_model(model)
 
@@ -101,7 +101,7 @@ def map_openai_params(
             drop_params=drop_params,
         )
 
-        # Only drop reasoning_effort='none' for non-gpt-5.1 models
+        # Only drop reasoning_effort='none' for non-gpt-5.1/5.2/5.4 models
         if result.get("reasoning_effort") == "none" and not is_gpt_5_1:
             result.pop("reasoning_effort")
 

diff --git a/litellm/llms/openai/chat/gpt_5_transformation.py b/litellm/llms/openai/chat/gpt_5_transformation.py
@@ -48,11 +48,11 @@ def is_model_gpt_5_1_codex_max_model(cls, model: str) -> bool:
 
     @classmethod
     def is_model_gpt_5_1_model(cls, model: str) -> bool:
-        """Check if the model is a gpt-5.1 or gpt-5.2 chat variant.
+        """Check if the model is a gpt-5.1, gpt-5.2, or gpt-5.4 chat variant.
 
-        gpt-5.1/5.2 support temperature when reasoning_effort="none",
+        gpt-5.1/5.2/5.4 support temperature when reasoning_effort="none",
         unlike base gpt-5 which only supports temperature=1. Excludes
-        pro variants which keep stricter knobs and gpt-5.2-chat variants
+        pro variants which keep stricter knobs and chat-only variants
         which only support temperature=1.
         """
         model_name = model.split("/")[-1]
@@ -62,7 +62,12 @@ def is_model_gpt_5_1_model(cls, model: str) -> bool:
             and "pro" not in model_name
             and not model_name.startswith("gpt-5.2-chat")
         )
-        return is_gpt_5_1 or is_gpt_5_2
+        is_gpt_5_4 = (
+            model_name.startswith("gpt-5.4")
+            and "pro" not in model_name
+            and not model_name.startswith("gpt-5.4-chat")
+        )
+        return is_gpt_5_1 or is_gpt_5_2 or is_gpt_5_4
 
     @classmethod
     def is_model_gpt_5_2_pro_model(cls, model: str) -> bool:
@@ -74,7 +79,7 @@ def is_model_gpt_5_2_pro_model(cls, model: str) -> bool:
     def is_model_gpt_5_2_model(cls, model: str) -> bool:
         """Check if the model is a gpt-5.2 variant (including pro)."""
         model_name = model.split("/")[-1]
-        return model_name.startswith("gpt-5.2")
+        return model_name.startswith("gpt-5.2") or model_name.startswith("gpt-5.4")
 
     def get_supported_openai_params(self, model: str) -> list:
         if self.is_model_gpt_5_search_model(model):
@@ -113,7 +118,7 @@ def get_supported_openai_params(self, model: str) -> list:
             "web_search_options",
         ]
 
-        # gpt-5.1/5.2 support logprobs, top_p, top_logprobs when reasoning_effort="none"
+        # gpt-5.1/5.2/5.4 support logprobs, top_p, top_logprobs when reasoning_effort="none"
         if not self.is_model_gpt_5_1_model(model):
             non_supported_params.extend(["logprobs", "top_p", "top_logprobs"])
 
@@ -156,7 +161,7 @@ def map_openai_params(
                 else:
                     raise litellm.utils.UnsupportedParamsError(
                         message=(
-                            "reasoning_effort='xhigh' is only supported for gpt-5.1-codex-max and gpt-5.2 models."
+                            "reasoning_effort='xhigh' is only supported for gpt-5.1-codex-max, gpt-5.2, and gpt-5.4+ models."
                         ),
                         status_code=400,
                     )
@@ -170,7 +175,7 @@ def map_openai_params(
                 "max_tokens"
             )
 
-        # gpt-5.1/5.2 support logprobs, top_p, top_logprobs only when reasoning_effort="none"
+        # gpt-5.1/5.2/5.4 support logprobs, top_p, top_logprobs only when reasoning_effort="none"
         if self.is_model_gpt_5_1_model(model):
             sampling_params = ["logprobs", "top_logprobs", "top_p"]
             has_sampling = any(p in non_default_params for p in sampling_params)
@@ -181,7 +186,7 @@ def map_openai_params(
                 else:
                     raise litellm.utils.UnsupportedParamsError(
                         message=(
-                            "gpt-5.1/5.2 only support logprobs, top_p, top_logprobs when "
+                            "gpt-5.1/5.2/5.4 only support logprobs, top_p, top_logprobs when "
                             "reasoning_effort='none'. Current reasoning_effort='{}'. "
                             "To drop unsupported params set `litellm.drop_params = True`"
                         ).format(reasoning_effort),

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
@@ -20810,6 +20810,78 @@
         "supports_vision": true,
         "supports_web_search": true
     },
+    "gpt-5.4": {
+        "cache_read_input_token_cost": 2.5e-07,
+        "cache_read_input_token_cost_priority": 5e-07,
+        "input_cost_per_token": 2.5e-06,
+        "input_cost_per_token_priority": 5e-06,
+        "litellm_provider": "openai",
+        "max_input_tokens": 1050000,
+        "max_output_tokens": 128000,
+        "max_tokens": 128000,
+        "mode": "responses",
+        "output_cost_per_token": 1.5e-05,
+        "output_cost_per_token_priority": 2.25e-05,
+        "supported_endpoints": [
+            "/v1/chat/completions",
+            "/v1/batch",
+            "/v1/responses"
+        ],
+        "supported_modalities": [
+            "text",
+            "image"
+        ],
+        "supported_output_modalities": [
+            "text"
+        ],
+        "supports_function_calling": true,
+        "supports_native_streaming": true,
+        "supports_parallel_function_calling": true,
+        "supports_pdf_input": true,
+        "supports_prompt_caching": true,
+        "supports_reasoning": true,
+        "supports_response_schema": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true,
+        "supports_service_tier": true,
+        "supports_vision": true
+    },
+    "gpt-5.4-2026-03-05": {
+        "cache_read_input_token_cost": 2.5e-07,
+        "cache_read_input_token_cost_priority": 5e-07,
+        "input_cost_per_token": 2.5e-06,
+        "input_cost_per_token_priority": 5e-06,
+        "litellm_provider": "openai",
+        "max_input_tokens": 1050000,
+        "max_output_tokens": 128000,
+        "max_tokens": 128000,
+        "mode": "responses",
+        "output_cost_per_token": 1.5e-05,
+        "output_cost_per_token_priority": 2.25e-05,
+        "supported_endpoints": [
+            "/v1/chat/completions",
+            "/v1/batch",
+            "/v1/responses"
+        ],
+        "supported_modalities": [
+            "text",
+            "image"
+        ],
+        "supported_output_modalities": [
+            "text"
+        ],
+        "supports_function_calling": true,
+        "supports_native_streaming": true,
+        "supports_parallel_function_calling": true,
+        "supports_pdf_input": true,
+        "supports_prompt_caching": true,
+        "supports_reasoning": true,
+        "supports_response_schema": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true,
+        "supports_service_tier": true,
+        "supports_vision": true
+    },
     "gpt-5-pro": {
         "input_cost_per_token": 1.5e-05,
         "input_cost_per_token_batches": 7.5e-06,

diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
@@ -20810,6 +20810,78 @@
         "supports_vision": true,
         "supports_web_search": true
     },
+    "gpt-5.4": {
+        "cache_read_input_token_cost": 2.5e-07,
+        "cache_read_input_token_cost_priority": 5e-07,
+        "input_cost_per_token": 2.5e-06,
+        "input_cost_per_token_priority": 5e-06,
+        "litellm_provider": "openai",
+        "max_input_tokens": 1050000,
+        "max_output_tokens": 128000,
+        "max_tokens": 128000,
+        "mode": "responses",
+        "output_cost_per_token": 1.5e-05,
+        "output_cost_per_token_priority": 2.25e-05,
+        "supported_endpoints": [
+            "/v1/chat/completions",
+            "/v1/batch",
+            "/v1/responses"
+        ],
+        "supported_modalities": [
+            "text",
+            "image"
+        ],
+        "supported_output_modalities": [
+            "text"
+        ],
+        "supports_function_calling": true,
+        "supports_native_streaming": true,
+        "supports_parallel_function_calling": true,
+        "supports_pdf_input": true,
+        "supports_prompt_caching": true,
+        "supports_reasoning": true,
+        "supports_response_schema": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true,
+        "supports_service_tier": true,
+        "supports_vision": true
+    },
+    "gpt-5.4-2026-03-05": {
+        "cache_read_input_token_cost": 2.5e-07,
+        "cache_read_input_token_cost_priority": 5e-07,
+        "input_cost_per_token": 2.5e-06,
+        "input_cost_per_token_priority": 5e-06,
+        "litellm_provider": "openai",
+        "max_input_tokens": 1050000,
+        "max_output_tokens": 128000,
+        "max_tokens": 128000,
+        "mode": "responses",
+        "output_cost_per_token": 1.5e-05,
+        "output_cost_per_token_priority": 2.25e-05,
+        "supported_endpoints": [
+            "/v1/chat/completions",
+            "/v1/batch",
+            "/v1/responses"
+        ],
+        "supported_modalities": [
+            "text",
+            "image"
+        ],
+        "supported_output_modalities": [
+            "text"
+        ],
+        "supports_function_calling": true,
+        "supports_native_streaming": true,
+        "supports_parallel_function_calling": true,
+        "supports_pdf_input": true,
+        "supports_prompt_caching": true,
+        "supports_reasoning": true,
+        "supports_response_schema": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true,
+        "supports_service_tier": true,
+        "supports_vision": true
+    },
     "gpt-5-pro": {
         "input_cost_per_token": 1.5e-05,
         "input_cost_per_token_batches": 7.5e-06,