BerriAI · Jan 27, 2026 · Jan 26, 2026
diff --git a/litellm/llms/azure/cost_calculation.py b/litellm/llms/azure/cost_calculation.py
@@ -1,11 +1,12 @@
 """
 Helper util for handling azure openai-specific cost calculation
-- e.g.: prompt caching
+- e.g.: prompt caching, audio tokens
 """
 
 from typing import Optional, Tuple
 
 from litellm._logging import verbose_logger
+from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token
 from litellm.types.utils import Usage
 from litellm.utils import get_model_info
 
@@ -18,34 +19,15 @@ def cost_per_token(
 
     Input:
         - model: str, the model name without provider prefix
-        - usage: LiteLLM Usage block, containing anthropic caching information
+        - usage: LiteLLM Usage block, containing caching and audio token information
 
     Returns:
         Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
     """
     ## GET MODEL INFO
     model_info = get_model_info(model=model, custom_llm_provider="azure")
-    cached_tokens: Optional[int] = None
-    ## CALCULATE INPUT COST
-    non_cached_text_tokens = usage.prompt_tokens
-    if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
-        cached_tokens = usage.prompt_tokens_details.cached_tokens
-        non_cached_text_tokens = non_cached_text_tokens - cached_tokens
-    prompt_cost: float = non_cached_text_tokens * model_info["input_cost_per_token"]
-
-    ## CALCULATE OUTPUT COST
-    completion_cost: float = (
-        usage["completion_tokens"] * model_info["output_cost_per_token"]
-    )
-
-    ## Prompt Caching cost calculation
-    if model_info.get("cache_read_input_token_cost") is not None and cached_tokens:
-        # Note: We read ._cache_read_input_tokens from the Usage - since cost_calculator.py standardizes the cache read tokens on usage._cache_read_input_tokens
-        prompt_cost += cached_tokens * (
-            model_info.get("cache_read_input_token_cost", 0) or 0
-        )
 
-    ## Speech / Audio cost calculation
+    ## Speech / Audio cost calculation (cost per second for TTS models)
     if (
         "output_cost_per_second" in model_info
         and model_info["output_cost_per_second"] is not None
@@ -55,7 +37,14 @@ def cost_per_token(
             f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; response time: {response_time_ms}"
         )
         ## COST PER SECOND ##
-        prompt_cost = 0
+        prompt_cost = 0.0
         completion_cost = model_info["output_cost_per_second"] * response_time_ms / 1000
-
-    return prompt_cost, completion_cost
+        return prompt_cost, completion_cost
+
+    ## Use generic cost calculator for all other cases
+    ## This properly handles: text tokens, audio tokens, cached tokens, reasoning tokens, etc.
+    return generic_cost_per_token(
+        model=model,
+        usage=usage,
+        custom_llm_provider="azure",
+    )
diff --git a/tests/test_litellm/test_cost_calculator.py b/tests/test_litellm/test_cost_calculator.py
@@ -355,6 +355,90 @@ def test_azure_realtime_cost_calculator():
     assert cost > 0
 
 
+def test_azure_audio_output_cost_calculation():
+    """
+    Test that Azure audio models correctly calculate costs for audio output tokens.
+
+    Reproduces issue: https://github.com/BerriAI/litellm/issues/19764
+    Audio tokens should be charged at output_cost_per_audio_token rate,
+    not at the text token rate (output_cost_per_token).
+    """
+    from litellm.types.utils import (
+        Choices,
+        CompletionTokensDetailsWrapper,
+        Message,
+    )
+
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    # Scenario from issue #19764:
+    # Input: 17 text tokens, 0 audio tokens
+    # Output: 110 text tokens, 482 audio tokens
+    usage_object = Usage(
+        prompt_tokens=17,
+        completion_tokens=592,  # 110 text + 482 audio
+        total_tokens=609,
+        prompt_tokens_details=PromptTokensDetailsWrapper(
+            audio_tokens=0,
+            cached_tokens=0,
+            text_tokens=17,
+            image_tokens=0,
+        ),
+        completion_tokens_details=CompletionTokensDetailsWrapper(
+            audio_tokens=482,
+            reasoning_tokens=0,
+            text_tokens=110,
+        ),
+    )
+
+    completion = ModelResponse(
+        id="test-azure-audio-cost",
+        choices=[
+            Choices(
+                finish_reason="stop",
+                index=0,
+                message=Message(
+                    content="Test response",
+                    role="assistant",
+                ),
+            )
+        ],
+        created=1729282652,
+        model="azure/gpt-audio-2025-08-28",
+        object="chat.completion",
+        usage=usage_object,
+    )
+
+    cost = completion_cost(completion, model="azure/gpt-audio-2025-08-28")
+
+    model_info = litellm.get_model_info("azure/gpt-audio-2025-08-28")
+
+    # Calculate expected cost
+    expected_input_cost = (
+        model_info["input_cost_per_token"] * 17  # text tokens
+    )
+    expected_output_cost = (
+        model_info["output_cost_per_token"] * 110  # text tokens
+        + model_info["output_cost_per_audio_token"] * 482  # audio tokens
+    )
+    expected_total_cost = expected_input_cost + expected_output_cost
+
+    # The bug was: all output tokens charged at text rate
+    wrong_output_cost = model_info["output_cost_per_token"] * 592
+    wrong_total_cost = expected_input_cost + wrong_output_cost
+
+    # Verify audio tokens are NOT charged at text rate (the bug)
+    assert abs(cost - wrong_total_cost) > 0.001, (
+        "Bug: Audio tokens are being charged at text token rate"
+    )
+
+    # Verify cost matches
+    assert abs(cost - expected_total_cost) < 0.0000001, (
+        f"Expected cost {expected_total_cost}, got {cost}"
+    )
+
+
 def test_default_image_cost_calculator(monkeypatch):
     from litellm.cost_calculator import default_image_cost_calculator