BerriAI · krrishdholakia · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026
diff --git a/docs/my-website/docs/providers/vertex.md b/docs/my-website/docs/providers/vertex.md
@@ -1687,6 +1687,20 @@ litellm.vertex_location = "us-central1 # Your Location
 | gemini-2.5-flash-lite-preview-09-2025   | `completion('gemini-2.5-flash-lite-preview-09-2025', messages)`, `completion('vertex_ai/gemini-2.5-flash-lite-preview-09-2025', messages)` |
 | gemini-3.1-flash-lite-preview   | `completion('gemini-3.1-flash-lite-preview', messages)`, `completion('vertex_ai/gemini-3.1-flash-lite-preview', messages)` |
 
+## PayGo / Priority Cost Tracking
+
+LiteLLM automatically tracks spend for Vertex AI Gemini models using the correct pricing tier based on the response's `usageMetadata.trafficType`:
+
+| Vertex AI `trafficType` | LiteLLM `service_tier` | Pricing applied |
+|-------------------------|-------------------------|-----------------|
+| `ON_DEMAND_PRIORITY` | `priority` | PayGo / priority pricing (`input_cost_per_token_priority`, `output_cost_per_token_priority`) |
+| `ON_DEMAND` | standard | Default on-demand pricing |
+| `FLEX` / `BATCH` | `flex` | Batch/flex pricing |
+
+When you use [Vertex AI PayGo](https://cloud.google.com/vertex-ai/generative-ai/pricing) (on-demand priority) or batch workloads, LiteLLM reads `trafficType` from the response and applies the matching cost per token from the [model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json). No configuration is required — spend tracking works out of the box for both standard and PayGo requests.
+
+See [Spend Tracking](../proxy/cost_tracking.md) for general cost tracking setup.
+
 ## Private Service Connect (PSC) Endpoints
 
 LiteLLM supports Vertex AI models deployed to Private Service Connect (PSC) endpoints, allowing you to use custom `api_base` URLs for private deployments.

diff --git a/docs/my-website/docs/proxy/cost_tracking.md b/docs/my-website/docs/proxy/cost_tracking.md
@@ -8,6 +8,8 @@ Track spend for keys, users, and teams across 100+ LLMs.
 
 LiteLLM automatically tracks spend for all known models. See our [model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)
 
+Provider-specific cost tracking (e.g., [Vertex AI PayGo / priority pricing](../providers/vertex.md#paygo--priority-cost-tracking), [Bedrock service tiers](../providers/bedrock.md#usage---service-tier), [Azure base model mapping](./custom_pricing.md#set-base_model-for-cost-tracking-eg-azure-deployments)) is applied automatically when the response includes tier metadata.
+
 :::tip Keep Pricing Data Updated
 [Sync model pricing data from GitHub](./sync_models_github.md) to ensure accurate cost tracking.
 :::

diff --git a/docs/my-website/docs/proxy/custom_pricing.md b/docs/my-website/docs/proxy/custom_pricing.md
@@ -104,9 +104,18 @@ There are other keys you can use to specify costs for different scenarios and mo
 - `input_cost_per_video_per_second` - Cost per second of video input
 - `input_cost_per_video_per_second_above_128k_tokens` - Video cost for large contexts
 - `input_cost_per_character` - Character-based pricing for some providers
+- `input_cost_per_token_priority` / `output_cost_per_token_priority` - Priority/PayGo pricing (Vertex AI Gemini, Bedrock)
+- `input_cost_per_token_flex` / `output_cost_per_token_flex` - Batch/flex pricing
 
 These keys evolve based on how new models handle multimodality. The latest version can be found at [https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json).
 
+### Service Tier / PayGo Pricing (Vertex AI, Bedrock)
+
+For providers that support multiple pricing tiers (e.g., Vertex AI PayGo, Bedrock service tiers), LiteLLM automatically applies the correct cost based on the response:
+
+- **Vertex AI Gemini**: Uses `usageMetadata.trafficType` (`ON_DEMAND_PRIORITY` → priority, `FLEX`/`BATCH` → flex). See [Vertex AI - PayGo / Priority Cost Tracking](../providers/vertex.md#paygo--priority-cost-tracking).
+- **Bedrock**: Uses `serviceTier` from the response. See [Bedrock - Usage - Service Tier](../providers/bedrock.md#usage---service-tier).
+
 ## Zero-Cost Models (Bypass Budget Checks)
 
 **Use Case**: You have on-premises or free models that should be accessible even when users exceed their budget limits.

diff --git a/litellm/constants.py b/litellm/constants.py
@@ -1242,6 +1242,11 @@
 LITELLM_METADATA_FIELD = "litellm_metadata"
 OLD_LITELLM_METADATA_FIELD = "metadata"
 LITELLM_TRUNCATED_PAYLOAD_FIELD = "litellm_truncated"
+LITELLM_TRUNCATION_DB_SAFEGUARD_NOTE = (
+    "Truncation is a DB storage safeguard. "
+    "Full, untruncated data is logged to logging callbacks (OTEL, Datadog, etc.). "
+    "To increase the truncation limit, set `MAX_STRING_LENGTH_PROMPT_IN_DB` in your env."
+)
 
 ########################### LiteLLM Proxy Specific Constants ###########################
 ########################################################################################

diff --git a/litellm/litellm_core_utils/audio_utils/utils.py b/litellm/litellm_core_utils/audio_utils/utils.py
@@ -263,7 +263,16 @@ def calculate_request_duration(file: FileTypes) -> Optional[float]:
         # Extract duration using soundfile
         file_object = io.BytesIO(file_content)
         with sf.SoundFile(file_object) as audio:
-            duration = len(audio) / audio.samplerate
+            frames = len(audio)
+            # Guard against sentinel/invalid frame counts (e.g., 2^63-1 from libsndfile)
+            if frames <= 0 or frames >= 2**63 - 1:
+                return None
+            if audio.samplerate <= 0:
+                return None
+            duration = frames / audio.samplerate
+            # Reject implausible durations (> 24 hours)
+            if duration > 86400:
+                return None
             return duration
 
     except Exception:

diff --git a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py
@@ -45,6 +45,20 @@
 
 from .get_headers import get_response_headers
 
+def _normalize_images(
-def _normalize_images(
+
+def _normalize_images(
-def _normalize_images(
+
+def _normalize_images(
+    images: Optional[List[Dict[str, object]]],
+) -> Optional[List[Dict[str, object]]]:
+    """Normalize image items to include required 'index' field if missing."""
+    if images is None:
+        return None
+    normalized: List[Dict[str, object]] = []
+    for i, img in enumerate(images):
+        if isinstance(img, dict) and "index" not in img:
+            img = {**img, "index": i}
+        normalized.append(img)
+    return normalized
+
+
 _MESSAGE_FIELDS: frozenset = frozenset(Message.model_fields.keys())
 _CHOICES_FIELDS: frozenset = frozenset(Choices.model_fields.keys())
 _MODEL_RESPONSE_FIELDS: frozenset = frozenset(ModelResponse.model_fields.keys()) | {
@@ -591,7 +605,7 @@ def convert_to_model_response_object(  # noqa: PLR0915
                         reasoning_content=reasoning_content,
                         thinking_blocks=thinking_blocks,
                         annotations=choice["message"].get("annotations", None),
-                        images=choice["message"].get("images", None),
+                        images=_normalize_images(choice["message"].get("images", None)),
                     )
                     finish_reason = choice.get("finish_reason", None)
                 if finish_reason is None:

diff --git a/litellm/proxy/management_endpoints/organization_endpoints.py b/litellm/proxy/management_endpoints/organization_endpoints.py
@@ -649,8 +649,8 @@ async def list_organization(
             "mode": "insensitive",  # Case-insensitive search
         }
 
-    # if proxy admin - get all orgs (with optional filters)
-    if user_api_key_dict.user_role == LitellmUserRoles.PROXY_ADMIN:
+    # if proxy admin or admin viewer - get all orgs (with optional filters)
+    if _user_has_admin_view(user_api_key_dict):
         response = await prisma_client.db.litellm_organizationtable.find_many(
             where=where_conditions if where_conditions else None,
             include={"litellm_budget_table": True, "members": True, "teams": True},

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
@@ -372,6 +372,9 @@ def generate_feedback_box():
 from litellm.proxy.management_endpoints.internal_user_endpoints import (
     user_update,
 )
+from litellm.proxy.management_endpoints.jwt_key_mapping_endpoints import (
+    router as jwt_key_mapping_router,
+)
 from litellm.proxy.management_endpoints.key_management_endpoints import (
     delete_verification_tokens,
     duration_in_seconds,
@@ -380,9 +383,6 @@ def generate_feedback_box():
 from litellm.proxy.management_endpoints.key_management_endpoints import (
     router as key_management_router,
 )
-from litellm.proxy.management_endpoints.jwt_key_mapping_endpoints import (
-    router as jwt_key_mapping_router,
-)
 from litellm.proxy.management_endpoints.mcp_management_endpoints import (
     router as mcp_management_router,
 )
@@ -661,6 +661,9 @@ def generate_feedback_box():
 
 ui_message += f"\n\n🔎 [```LiteLLM Model Hub```]({model_hub_link}). See available models on the proxy. [**Docs**](https://docs.litellm.ai/docs/proxy/ai_hub)"
 
+chat_link = f"{server_root_path}/ui/chat"
+ui_message += f"\n\n💬 [```LiteLLM Chat UI```]({chat_link}). ChatGPT-like interface for your users to chat with AI models and MCP tools."
+
 custom_swagger_message = "[**Customize Swagger Docs**](https://docs.litellm.ai/docs/proxy/enterprise#swagger-docs---custom-routes--branding)"
 
 ### CUSTOM BRANDING [ENTERPRISE FEATURE] ###

diff --git a/litellm/proxy/spend_tracking/spend_tracking_utils.py b/litellm/proxy/spend_tracking/spend_tracking_utils.py
@@ -11,6 +11,10 @@
 
 import litellm
 from litellm._logging import verbose_proxy_logger
+from litellm.constants import (
+    LITELLM_TRUNCATED_PAYLOAD_FIELD,
+    LITELLM_TRUNCATION_DB_SAFEGUARD_NOTE,
+)
 from litellm.constants import \
     MAX_STRING_LENGTH_PROMPT_IN_DB as DEFAULT_MAX_STRING_LENGTH_PROMPT_IN_DB
 from litellm.constants import REDACTED_BY_LITELM_STRING
@@ -628,7 +632,10 @@ def _sanitize_request_body_for_spend_logs_payload(
     Recursively sanitize request body to prevent logging large base64 strings or other large values.
     Truncates strings longer than MAX_STRING_LENGTH_PROMPT_IN_DB characters and handles nested dictionaries.
     """
-    from litellm.constants import LITELLM_TRUNCATED_PAYLOAD_FIELD
+    from litellm.constants import (
+        LITELLM_TRUNCATED_PAYLOAD_FIELD,
+        LITELLM_TRUNCATION_DB_SAFEGUARD_NOTE,
+    )
 
     if visited is None:
         visited = set()
@@ -674,7 +681,8 @@ def _sanitize_value(value: Any) -> Any:
                 # Build the truncated string: beginning + truncation marker + end
                 truncated_value = (
                     f"{value[:start_chars]}"
-                    f"... ({LITELLM_TRUNCATED_PAYLOAD_FIELD} skipped {skipped_chars} chars) ..."
+                    f"... ({LITELLM_TRUNCATED_PAYLOAD_FIELD} skipped {skipped_chars} chars. "
+                    f"{LITELLM_TRUNCATION_DB_SAFEGUARD_NOTE}) ..."
                     f"{value[-end_chars:]}"
                 )
                 return truncated_value
@@ -791,6 +799,11 @@ def _get_proxy_server_request_for_spend_logs_payload(
 
             _request_body = _sanitize_request_body_for_spend_logs_payload(_request_body)
             _request_body_json_str = json.dumps(_request_body, default=str)
+            if LITELLM_TRUNCATED_PAYLOAD_FIELD in _request_body_json_str:
+                verbose_proxy_logger.info(
+                    "Spend Log: request body was truncated before storing in DB. %s",
+                    LITELLM_TRUNCATION_DB_SAFEGUARD_NOTE,
+                )
             return _request_body_json_str
     return "{}"
 
@@ -866,8 +879,15 @@ def _get_response_for_spend_logs_payload(
         if sanitized_response is None:
             return "{}"
         if isinstance(sanitized_response, str):
-            return sanitized_response
-        return safe_dumps(sanitized_response)
+            result_str = sanitized_response
+        else:
+            result_str = safe_dumps(sanitized_response)
+        if LITELLM_TRUNCATED_PAYLOAD_FIELD in result_str:
+            verbose_proxy_logger.info(
+                "Spend Log: response was truncated before storing in DB. %s",
+                LITELLM_TRUNCATION_DB_SAFEGUARD_NOTE,
+            )
+        return result_str
     return "{}"
 
 

diff --git a/tests/test_litellm/proxy/spend_tracking/test_spend_tracking_utils.py b/tests/test_litellm/proxy/spend_tracking/test_spend_tracking_utils.py
@@ -16,7 +16,11 @@
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import litellm
-from litellm.constants import LITELLM_TRUNCATED_PAYLOAD_FIELD, REDACTED_BY_LITELM_STRING
+from litellm.constants import (
+    LITELLM_TRUNCATED_PAYLOAD_FIELD,
+    LITELLM_TRUNCATION_DB_SAFEGUARD_NOTE,
+    REDACTED_BY_LITELM_STRING,
+)
 from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
 from litellm.proxy.spend_tracking.spend_tracking_utils import (
     _get_messages_for_spend_logs_payload,
@@ -60,7 +64,7 @@ def test_sanitize_request_body_for_spend_logs_payload_long_string():
         end_chars = MAX_STRING_LENGTH_PROMPT_IN_DB - start_chars
 
     skipped_chars = len(long_string) - (start_chars + end_chars)
-    expected_truncation_message = f"... ({LITELLM_TRUNCATED_PAYLOAD_FIELD} skipped {skipped_chars} chars) ..."
+    expected_truncation_message = f"... ({LITELLM_TRUNCATED_PAYLOAD_FIELD} skipped {skipped_chars} chars. {LITELLM_TRUNCATION_DB_SAFEGUARD_NOTE}) ..."
     expected_length = start_chars + len(expected_truncation_message) + end_chars
 
     assert len(sanitized["text"]) == expected_length
@@ -86,7 +90,7 @@ def test_sanitize_request_body_for_spend_logs_payload_nested_dict():
         end_chars = MAX_STRING_LENGTH_PROMPT_IN_DB - start_chars
 
     skipped_chars = len(long_string) - total_keep
-    expected_truncation_message = f"... ({LITELLM_TRUNCATED_PAYLOAD_FIELD} skipped {skipped_chars} chars) ..."
+    expected_truncation_message = f"... ({LITELLM_TRUNCATED_PAYLOAD_FIELD} skipped {skipped_chars} chars. {LITELLM_TRUNCATION_DB_SAFEGUARD_NOTE}) ..."
     expected_length = start_chars + len(expected_truncation_message) + end_chars
 
     assert len(sanitized["outer"]["inner"]["text"]) == expected_length
@@ -111,7 +115,7 @@ def test_sanitize_request_body_for_spend_logs_payload_nested_list():
         end_chars = MAX_STRING_LENGTH_PROMPT_IN_DB - start_chars
 
     skipped_chars = len(long_string) - total_keep
-    expected_truncation_message = f"... ({LITELLM_TRUNCATED_PAYLOAD_FIELD} skipped {skipped_chars} chars) ..."
+    expected_truncation_message = f"... ({LITELLM_TRUNCATED_PAYLOAD_FIELD} skipped {skipped_chars} chars. {LITELLM_TRUNCATION_DB_SAFEGUARD_NOTE}) ..."
     expected_length = start_chars + len(expected_truncation_message) + end_chars
 
     assert len(sanitized["items"][0]["text"]) == expected_length
@@ -151,7 +155,7 @@ def test_sanitize_request_body_for_spend_logs_payload_mixed_types():
         end_chars = MAX_STRING_LENGTH_PROMPT_IN_DB - start_chars
 
     skipped_chars = len(long_string) - total_keep
-    expected_truncation_message = f"... ({LITELLM_TRUNCATED_PAYLOAD_FIELD} skipped {skipped_chars} chars) ..."
+    expected_truncation_message = f"... ({LITELLM_TRUNCATED_PAYLOAD_FIELD} skipped {skipped_chars} chars. {LITELLM_TRUNCATION_DB_SAFEGUARD_NOTE}) ..."
     expected_length = start_chars + len(expected_truncation_message) + end_chars
 
     assert len(sanitized["text"]) == expected_length
@@ -396,6 +400,78 @@ def test_get_response_for_spend_logs_payload_truncates_large_embedding(mock_shou
     assert parsed["data"][0]["other_field"] == "value"
 
 
+def test_truncation_includes_db_safeguard_note():
+    """
+    Test that truncated content includes the DB safeguard note explaining
+    that full data is available in OTEL/other logging integrations.
+    """
+    from litellm.constants import MAX_STRING_LENGTH_PROMPT_IN_DB
+
+    large_error = "Error: " + "x" * (MAX_STRING_LENGTH_PROMPT_IN_DB + 1000)
+    request_body = {"error_trace": large_error}
+    sanitized = _sanitize_request_body_for_spend_logs_payload(request_body)
+
+    truncated = sanitized["error_trace"]
+    assert LITELLM_TRUNCATED_PAYLOAD_FIELD in truncated
+    assert LITELLM_TRUNCATION_DB_SAFEGUARD_NOTE in truncated
+    assert "DB storage safeguard" in truncated
+    assert "logging callbacks" in truncated.lower() or "logging integrations" in truncated.lower() or "logging callbacks" in truncated
+
+
+@patch(
+    "litellm.proxy.spend_tracking.spend_tracking_utils._should_store_prompts_and_responses_in_spend_logs"
+)
+def test_response_truncation_logs_info_message(mock_should_store):
+    """
+    Test that when response is truncated before DB storage, an info log is emitted
+    noting that full data is available in OTEL/other integrations.
+    """
+    from litellm.constants import MAX_STRING_LENGTH_PROMPT_IN_DB
+
+    mock_should_store.return_value = True
+    large_text = "B" * (MAX_STRING_LENGTH_PROMPT_IN_DB + 500)
+    payload = cast(
+        StandardLoggingPayload,
+        {"response": {"data": [{"content": large_text}]}},
+    )
+
+    with patch(
+        "litellm.proxy.spend_tracking.spend_tracking_utils.verbose_proxy_logger"
+    ) as mock_logger:
+        _get_response_for_spend_logs_payload(payload)
+        mock_logger.info.assert_called_once()
+        log_msg = mock_logger.info.call_args[0][0]
+        assert "response was truncated" in log_msg
+
+
+@patch(
+    "litellm.proxy.spend_tracking.spend_tracking_utils._should_store_prompts_and_responses_in_spend_logs"
+)
+def test_request_body_truncation_logs_info_message(mock_should_store):
+    """
+    Test that when request body is truncated before DB storage, an info log is emitted.
+    """
+    from litellm.constants import MAX_STRING_LENGTH_PROMPT_IN_DB
+
+    mock_should_store.return_value = True
+    large_prompt = "C" * (MAX_STRING_LENGTH_PROMPT_IN_DB + 500)
+    litellm_params = {
+        "proxy_server_request": {
+            "body": {"messages": [{"role": "user", "content": large_prompt}]}
+        }
+    }
+
+    with patch(
+        "litellm.proxy.spend_tracking.spend_tracking_utils.verbose_proxy_logger"
+    ) as mock_logger:
+        _get_proxy_server_request_for_spend_logs_payload(
+            metadata={}, litellm_params=litellm_params, kwargs={}
+        )
+        mock_logger.info.assert_called_once()
+        log_msg = mock_logger.info.call_args[0][0]
+        assert "request body was truncated" in log_msg
+
+
 def test_safe_dumps_handles_circular_references():
     """Test that safe_dumps can handle circular references without raising exceptions"""