From 5f2a695fc857b2a5432124bebdb085d2e79c25c9 Mon Sep 17 00:00:00 2001 From: johngreek <2006605+jgreek@users.noreply.github.com> Date: Thu, 22 Jan 2026 22:13:40 -0500 Subject: [PATCH] [Fix] Anthropic models on Azure AI cache pricing (#19532) --- ...odel_prices_and_context_window_backup.json | 12 ++++++ model_prices_and_context_window.json | 12 ++++++ .../anthropic/test_azure_ai_cache_pricing.py | 42 +++++++++++++++++++ 3 files changed, 66 insertions(+) create mode 100644 tests/test_litellm/llms/anthropic/test_azure_ai_cache_pricing.py diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 470d598a25f..16a345247a4 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1312,6 +1312,9 @@ "supports_function_calling": true }, "azure_ai/claude-haiku-4-5": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_creation_input_token_cost_above_1hr": 2e-06, + "cache_read_input_token_cost": 1e-07, "input_cost_per_token": 1e-06, "litellm_provider": "azure_ai", "max_input_tokens": 200000, @@ -1330,6 +1333,9 @@ "supports_vision": true }, "azure_ai/claude-opus-4-5": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, "input_cost_per_token": 5e-06, "litellm_provider": "azure_ai", "max_input_tokens": 200000, @@ -1348,6 +1354,9 @@ "supports_vision": true }, "azure_ai/claude-opus-4-1": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_creation_input_token_cost_above_1hr": 3e-05, + "cache_read_input_token_cost": 1.5e-06, "input_cost_per_token": 1.5e-05, "litellm_provider": "azure_ai", "max_input_tokens": 200000, @@ -1366,6 +1375,9 @@ "supports_vision": true }, "azure_ai/claude-sonnet-4-5": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_creation_input_token_cost_above_1hr": 6e-06, + "cache_read_input_token_cost": 3e-07, "input_cost_per_token": 3e-06, "litellm_provider": "azure_ai", "max_input_tokens": 200000, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 470d598a25f..16a345247a4 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1312,6 +1312,9 @@ "supports_function_calling": true }, "azure_ai/claude-haiku-4-5": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_creation_input_token_cost_above_1hr": 2e-06, + "cache_read_input_token_cost": 1e-07, "input_cost_per_token": 1e-06, "litellm_provider": "azure_ai", "max_input_tokens": 200000, @@ -1330,6 +1333,9 @@ "supports_vision": true }, "azure_ai/claude-opus-4-5": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, "input_cost_per_token": 5e-06, "litellm_provider": "azure_ai", "max_input_tokens": 200000, @@ -1348,6 +1354,9 @@ "supports_vision": true }, "azure_ai/claude-opus-4-1": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_creation_input_token_cost_above_1hr": 3e-05, + "cache_read_input_token_cost": 1.5e-06, "input_cost_per_token": 1.5e-05, "litellm_provider": "azure_ai", "max_input_tokens": 200000, @@ -1366,6 +1375,9 @@ "supports_vision": true }, "azure_ai/claude-sonnet-4-5": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_creation_input_token_cost_above_1hr": 6e-06, + "cache_read_input_token_cost": 3e-07, "input_cost_per_token": 3e-06, "litellm_provider": "azure_ai", "max_input_tokens": 200000, diff --git a/tests/test_litellm/llms/anthropic/test_azure_ai_cache_pricing.py b/tests/test_litellm/llms/anthropic/test_azure_ai_cache_pricing.py new file mode 100644 index 00000000000..45c988a21b8 --- /dev/null +++ b/tests/test_litellm/llms/anthropic/test_azure_ai_cache_pricing.py @@ -0,0 +1,42 @@ +""" +Test that Azure AI Anthropic models have cache pricing configured. +Verifies the fix for issue #19532. +""" + +import sys +import os + +sys.path.insert(0, os.path.abspath("../../../../../")) + +import litellm +from litellm import get_model_info +from litellm.litellm_core_utils.get_model_cost_map import get_model_cost_map +import pytest + + +@pytest.fixture(autouse=True) +def reload_model_costs(): + """Reload model costs from JSON before each test.""" + litellm.model_cost = get_model_cost_map(url=None) + yield + + +@pytest.mark.parametrize( + "model,expected_cache_creation_cost,expected_cache_read_cost", + [ + ("claude-haiku-4-5", 1.25e-06, 1e-07), + ("claude-opus-4-5", 6.25e-06, 5e-07), + ("claude-opus-4-1", 1.875e-05, 1.5e-06), + ("claude-sonnet-4-5", 3.75e-06, 3e-07), + ], +) +def test_azure_ai_claude_cache_pricing( + model, expected_cache_creation_cost, expected_cache_read_cost +): + """Test that Azure AI Claude models have correct cache pricing.""" + model_info = get_model_info(model=model, custom_llm_provider="azure_ai") + + assert model_info.get("cache_creation_input_token_cost") is not None + assert model_info.get("cache_read_input_token_cost") is not None + assert model_info.get("cache_creation_input_token_cost") == expected_cache_creation_cost + assert model_info.get("cache_read_input_token_cost") == expected_cache_read_cost