Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions docs/my-website/blog/gpt_5_4/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
---
slug: gpt_5_4
title: "GPT-5.4 Model Support"
date: 2026-03-05T10:00:00
authors:
- name: Sameer Kankute
title: SWE @ LiteLLM (LLM Translation)
url: https://www.linkedin.com/in/sameer-kankute/
image_url: https://pbs.twimg.com/profile_images/2001352686994907136/ONgNuSk5_400x400.jpg
- name: Krrish Dholakia
title: "CEO, LiteLLM"
url: https://www.linkedin.com/in/krish-d/
image_url: https://pbs.twimg.com/profile_images/1298587542745358340/DZv3Oj-h_400x400.jpg
- name: Ishaan Jaff
title: "CTO, LiteLLM"
url: https://www.linkedin.com/in/reffajnaahsi/
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
description: "GPT-5.4 model support in LiteLLM"
tags: [openai, gpt-5.4, completion]
hide_table_of_contents: false
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

LiteLLM now supports fully GPT-5.4!

## Docker Image

```bash
docker pull ghcr.io/berriai/litellm:v1.81.14-stable.gpt-4o
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Docker image tag references GPT-4o instead of GPT-5.4. The tag v1.81.14-stable.gpt-4o explicitly names gpt-4o, which conflicts with this blog post documenting gpt-5.4 support. Update to a release tag that actually contains gpt-5.4 support, or use a generic/latest tag if the appropriate release doesn't exist yet.

```

Comment on lines +31 to +33
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Docker image tag references wrong model

The Docker image pulled and run in the usage example is tagged v1.81.14-stable.gpt-4o, which refers to GPT-4o, not GPT-5.4. This should either be a generic/latest tag or updated to a tag that actually contains GPT-5.4 support.

Suggested change
docker pull ghcr.io/berriai/litellm:v1.81.14-stable.gpt-4o
```
docker pull ghcr.io/berriai/litellm:main-latest

## Usage

<Tabs>
<TabItem value="proxy" label="LiteLLM Proxy">

**1. Setup config.yaml**

```yaml
model_list:
- model_name: gpt-5.4
litellm_params:
model: openai/gpt-5.4
api_key: os.environ/OPENAI_API_KEY
```

**2. Start the proxy**

```bash
docker run -d \
-p 4000:4000 \
-e OPENAI_API_KEY=$OPENAI_API_KEY \
-v $(pwd)/config.yaml:/app/config.yaml \
ghcr.io/berriai/litellm:v1.81.14-stable.gpt-4o \
--config /app/config.yaml
```

**3. Test it**

```bash
curl -X POST "http://0.0.0.0:4000/chat/completions" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $LITELLM_KEY" \
-d '{
"model": "gpt-5.4",
"messages": [
{"role": "user", "content": "Write a Python function to check if a number is prime."}
]
}'
```

</TabItem>
<TabItem value="sdk" label="LiteLLM SDK">

```python
from litellm import completion

response = completion(
model="openai/gpt-5.4",
messages=[
{"role": "user", "content": "Write a Python function to check if a number is prime."}
],
)

print(response.choices[0].message.content)
```

</TabItem>
</Tabs>

## Notes

- Use `/responses` for better model performance.
- GPT-5.4 supports reasoning, function calling, vision, and tool-use — see the [OpenAI provider docs](../../docs/providers/openai) for advanced usage.
12 changes: 6 additions & 6 deletions litellm/llms/azure/chat/gpt_5_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def is_model_gpt_5_model(cls, model: str) -> bool:
def get_supported_openai_params(self, model: str) -> List[str]:
"""Get supported parameters for Azure OpenAI GPT-5 models.

Azure OpenAI GPT-5.2 models support logprobs, unlike OpenAI's GPT-5.
This overrides the parent class to add logprobs support back for gpt-5.2.
Azure OpenAI GPT-5.2/5.4 models support logprobs, unlike OpenAI's GPT-5.
This overrides the parent class to add logprobs support back for gpt-5.2+.

Reference:
- Tested with Azure OpenAI GPT-5.2 (api-version: 2025-01-01-preview)
Expand All @@ -43,9 +43,9 @@ def get_supported_openai_params(self, model: str) -> List[str]:
if "tool_choice" not in params:
params.append("tool_choice")

# Only gpt-5.2 has been verified to support logprobs on Azure.
# Only gpt-5.2+ has been verified to support logprobs on Azure.
# The base OpenAI class includes logprobs for gpt-5.1+, but Azure
# hasn't verified support for gpt-5.1, so remove them unless gpt-5.2.
# hasn't verified support for gpt-5.1, so remove them unless gpt-5.2/5.4+.
if self.is_model_gpt_5_1_model(model) and not self.is_model_gpt_5_2_model(model):
params = [p for p in params if p not in ["logprobs", "top_logprobs"]]
elif self.is_model_gpt_5_2_model(model):
Expand All @@ -67,7 +67,7 @@ def map_openai_params(
or optional_params.get("reasoning_effort")
)

# gpt-5.1 supports reasoning_effort='none', but other gpt-5 models don't
# gpt-5.1/5.2/5.4 support reasoning_effort='none', but other gpt-5 models don't
# See: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/reasoning
is_gpt_5_1 = self.is_model_gpt_5_1_model(model)

Expand Down Expand Up @@ -101,7 +101,7 @@ def map_openai_params(
drop_params=drop_params,
)

# Only drop reasoning_effort='none' for non-gpt-5.1 models
# Only drop reasoning_effort='none' for non-gpt-5.1/5.2/5.4 models
if result.get("reasoning_effort") == "none" and not is_gpt_5_1:
result.pop("reasoning_effort")

Expand Down
23 changes: 14 additions & 9 deletions litellm/llms/openai/chat/gpt_5_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ def is_model_gpt_5_1_codex_max_model(cls, model: str) -> bool:

@classmethod
def is_model_gpt_5_1_model(cls, model: str) -> bool:
"""Check if the model is a gpt-5.1 or gpt-5.2 chat variant.
"""Check if the model is a gpt-5.1, gpt-5.2, or gpt-5.4 chat variant.

gpt-5.1/5.2 support temperature when reasoning_effort="none",
gpt-5.1/5.2/5.4 support temperature when reasoning_effort="none",
unlike base gpt-5 which only supports temperature=1. Excludes
pro variants which keep stricter knobs and gpt-5.2-chat variants
pro variants which keep stricter knobs and chat-only variants
which only support temperature=1.
"""
model_name = model.split("/")[-1]
Expand All @@ -62,7 +62,12 @@ def is_model_gpt_5_1_model(cls, model: str) -> bool:
and "pro" not in model_name
and not model_name.startswith("gpt-5.2-chat")
)
return is_gpt_5_1 or is_gpt_5_2
is_gpt_5_4 = (
model_name.startswith("gpt-5.4")
and "pro" not in model_name
and not model_name.startswith("gpt-5.4-chat")
)
return is_gpt_5_1 or is_gpt_5_2 or is_gpt_5_4
Comment on lines +65 to +70
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hardcoded model name violates no-hardcoding rule

The custom rule for this repository explicitly prohibits hardcoding model-specific flags directly in transformation code. Instead, capabilities should be read from model_prices_and_context_window.json via helper functions like get_model_info or supports_reasoning().

gpt-5.4 is already added to the JSON with "supports_reasoning": true, "supports_tool_choice": true, etc. The code should leverage those fields rather than adding another startswith("gpt-5.4") guard. This means every future model revision (e.g. gpt-5.5, gpt-5.4-turbo) will again require a code change instead of a JSON update.

The same hardcoded pattern also exists in is_model_gpt_5_2_model (line 82) and propagates to the Azure config at litellm/llms/azure/chat/gpt_5_transformation.py:49,51.

Context Used: Rule from dashboard - What: Do not hardcode model-specific flags in the codebase. Instead, put them in model_prices_and_co... (source)

Comment on lines +65 to +70
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hardcoded model-specific flag violates the no-hardcoding rule. Custom rule [2605a1b1] explicitly requires putting model capabilities in model_prices_and_context_window.json and reading them via helper functions like supports_reasoning(), not via startswith() checks in the transformation code. This pattern propagates further to is_model_gpt_5_2_model() (line 82) and the Azure config. Future model revisions (e.g., gpt-5.5, gpt-5.4-turbo) will again require code changes instead of just updating the JSON.

If gpt-5.4 needs the same parameter handling as gpt-5.2 and gpt-5.1, add the necessary capability fields to its JSON entry and use a helper function to check them.

Context Used: Rule from dashboard - What: Do not hardcode model-specific flags in the codebase. Instead, put them in model_prices_and_co... (source)


@classmethod
def is_model_gpt_5_2_pro_model(cls, model: str) -> bool:
Expand All @@ -74,7 +79,7 @@ def is_model_gpt_5_2_pro_model(cls, model: str) -> bool:
def is_model_gpt_5_2_model(cls, model: str) -> bool:
"""Check if the model is a gpt-5.2 variant (including pro)."""
model_name = model.split("/")[-1]
return model_name.startswith("gpt-5.2")
return model_name.startswith("gpt-5.2") or model_name.startswith("gpt-5.4")
Comment on lines 79 to +82
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is_model_gpt_5_2_model now incorrectly returns True for gpt-5.4

The method is named is_model_gpt_5_2_model but now returns True for gpt-5.4 models too:

return model_name.startswith("gpt-5.2") or model_name.startswith("gpt-5.4")

This is semantically wrong and will be a source of confusion for future contributors who call is_model_gpt_5_2_model(model) and expect it to only match gpt-5.2 family models. All downstream callers in azure/chat/gpt_5_transformation.py (lines 49 and 51) now silently treat gpt-5.4 as gpt-5.2 without any indication that this is intentional.

If gpt-5.4 needs the same logprobs/xhigh treatment as gpt-5.2, a more maintainable approach would be to introduce a separate is_model_gpt_5_4_model method and update the call-sites explicitly, or better, to consolidate these checks into a single capability-based helper that reads from model info.

Comment on lines 79 to +82
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Method name is_model_gpt_5_2_model() is semantically incorrect — it now returns True for both gpt-5.2 and gpt-5.4 models:

return model_name.startswith("gpt-5.2") or model_name.startswith("gpt-5.4")

This misleads future contributors who expect the method to only match gpt-5.2 variants. Downstream callers in azure/chat/gpt_5_transformation.py (lines 49, 51) silently apply gpt-5.2 parameter handling (logprobs support) to gpt-5.4 without explicit indication.

Consider either introducing a separate is_model_gpt_5_4_model() method with explicit call-site updates, or consolidating into a single capability-based helper that reads from model info.


def get_supported_openai_params(self, model: str) -> list:
if self.is_model_gpt_5_search_model(model):
Expand Down Expand Up @@ -113,7 +118,7 @@ def get_supported_openai_params(self, model: str) -> list:
"web_search_options",
]

# gpt-5.1/5.2 support logprobs, top_p, top_logprobs when reasoning_effort="none"
# gpt-5.1/5.2/5.4 support logprobs, top_p, top_logprobs when reasoning_effort="none"
if not self.is_model_gpt_5_1_model(model):
non_supported_params.extend(["logprobs", "top_p", "top_logprobs"])

Expand Down Expand Up @@ -156,7 +161,7 @@ def map_openai_params(
else:
raise litellm.utils.UnsupportedParamsError(
message=(
"reasoning_effort='xhigh' is only supported for gpt-5.1-codex-max and gpt-5.2 models."
"reasoning_effort='xhigh' is only supported for gpt-5.1-codex-max, gpt-5.2, and gpt-5.4+ models."
),
status_code=400,
)
Expand All @@ -170,7 +175,7 @@ def map_openai_params(
"max_tokens"
)

# gpt-5.1/5.2 support logprobs, top_p, top_logprobs only when reasoning_effort="none"
# gpt-5.1/5.2/5.4 support logprobs, top_p, top_logprobs only when reasoning_effort="none"
if self.is_model_gpt_5_1_model(model):
sampling_params = ["logprobs", "top_logprobs", "top_p"]
has_sampling = any(p in non_default_params for p in sampling_params)
Expand All @@ -181,7 +186,7 @@ def map_openai_params(
else:
raise litellm.utils.UnsupportedParamsError(
message=(
"gpt-5.1/5.2 only support logprobs, top_p, top_logprobs when "
"gpt-5.1/5.2/5.4 only support logprobs, top_p, top_logprobs when "
"reasoning_effort='none'. Current reasoning_effort='{}'. "
"To drop unsupported params set `litellm.drop_params = True`"
).format(reasoning_effort),
Expand Down
72 changes: 72 additions & 0 deletions litellm/model_prices_and_context_window_backup.json
Original file line number Diff line number Diff line change
Expand Up @@ -20810,6 +20810,78 @@
"supports_vision": true,
"supports_web_search": true
},
"gpt-5.4": {
"cache_read_input_token_cost": 2.5e-07,
"cache_read_input_token_cost_priority": 5e-07,
"input_cost_per_token": 2.5e-06,
"input_cost_per_token_priority": 5e-06,
"litellm_provider": "openai",
"max_input_tokens": 1050000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "responses",
"output_cost_per_token": 1.5e-05,
"output_cost_per_token_priority": 2.25e-05,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
"/v1/responses"
],
"supported_modalities": [
"text",
"image"
],
"supported_output_modalities": [
"text"
],
"supports_function_calling": true,
"supports_native_streaming": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_service_tier": true,
"supports_vision": true
},
"gpt-5.4-2026-03-05": {
"cache_read_input_token_cost": 2.5e-07,
"cache_read_input_token_cost_priority": 5e-07,
"input_cost_per_token": 2.5e-06,
"input_cost_per_token_priority": 5e-06,
"litellm_provider": "openai",
"max_input_tokens": 1050000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "responses",
"output_cost_per_token": 1.5e-05,
"output_cost_per_token_priority": 2.25e-05,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
"/v1/responses"
],
"supported_modalities": [
"text",
"image"
],
"supported_output_modalities": [
"text"
],
"supports_function_calling": true,
"supports_native_streaming": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_service_tier": true,
"supports_vision": true
},
"gpt-5-pro": {
"input_cost_per_token": 1.5e-05,
"input_cost_per_token_batches": 7.5e-06,
Expand Down
72 changes: 72 additions & 0 deletions model_prices_and_context_window.json
Original file line number Diff line number Diff line change
Expand Up @@ -20810,6 +20810,78 @@
"supports_vision": true,
"supports_web_search": true
},
"gpt-5.4": {
"cache_read_input_token_cost": 2.5e-07,
"cache_read_input_token_cost_priority": 5e-07,
"input_cost_per_token": 2.5e-06,
"input_cost_per_token_priority": 5e-06,
"litellm_provider": "openai",
"max_input_tokens": 1050000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "responses",
"output_cost_per_token": 1.5e-05,
"output_cost_per_token_priority": 2.25e-05,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
"/v1/responses"
],
"supported_modalities": [
"text",
"image"
],
"supported_output_modalities": [
"text"
],
"supports_function_calling": true,
"supports_native_streaming": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_service_tier": true,
"supports_vision": true
},
"gpt-5.4-2026-03-05": {
"cache_read_input_token_cost": 2.5e-07,
"cache_read_input_token_cost_priority": 5e-07,
"input_cost_per_token": 2.5e-06,
"input_cost_per_token_priority": 5e-06,
"litellm_provider": "openai",
"max_input_tokens": 1050000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "responses",
"output_cost_per_token": 1.5e-05,
"output_cost_per_token_priority": 2.25e-05,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
"/v1/responses"
],
"supported_modalities": [
"text",
"image"
],
"supported_output_modalities": [
"text"
],
"supports_function_calling": true,
"supports_native_streaming": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_service_tier": true,
"supports_vision": true
},
"gpt-5-pro": {
"input_cost_per_token": 1.5e-05,
"input_cost_per_token_batches": 7.5e-06,
Expand Down
Loading