-
-
Notifications
You must be signed in to change notification settings - Fork 6.6k
Add Priority PayGo cost tracking gemini/vertex ai #21909
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -480,6 +480,7 @@ def cost_per_token( # noqa: PLR0915 | |
| model=model_without_prefix, | ||
| custom_llm_provider=custom_llm_provider, | ||
| usage=usage_block, | ||
| service_tier=service_tier, | ||
| ) | ||
| elif custom_llm_provider == "anthropic": | ||
| return anthropic_cost_per_token(model=model, usage=usage_block) | ||
|
|
@@ -500,7 +501,9 @@ def cost_per_token( # noqa: PLR0915 | |
| model=model, usage=usage_block, response_time_ms=response_time_ms | ||
| ) | ||
| elif custom_llm_provider == "gemini": | ||
| return gemini_cost_per_token(model=model, usage=usage_block) | ||
| return gemini_cost_per_token( | ||
| model=model, usage=usage_block, service_tier=service_tier | ||
| ) | ||
| elif custom_llm_provider == "deepseek": | ||
| return deepseek_cost_per_token(model=model, usage=usage_block) | ||
| elif custom_llm_provider == "perplexity": | ||
|
|
@@ -704,6 +707,36 @@ def _get_response_model(completion_response: Any) -> Optional[str]: | |
| return None | ||
|
|
||
|
|
||
| _GEMINI_TRAFFIC_TYPE_TO_SERVICE_TIER: dict = { | ||
| # ON_DEMAND_PRIORITY maps to "priority" — selects input_cost_per_token_priority, etc. | ||
| "ON_DEMAND_PRIORITY": "priority", | ||
| # FLEX / BATCH maps to "flex" — selects input_cost_per_token_flex, etc. | ||
| "FLEX": "flex", | ||
| "BATCH": "flex", | ||
| # ON_DEMAND is standard pricing — no service_tier suffix applied | ||
| "ON_DEMAND": None, | ||
| } | ||
|
|
||
|
|
||
| def _map_traffic_type_to_service_tier(traffic_type: Optional[str]) -> Optional[str]: | ||
| """ | ||
| Map a Gemini usageMetadata.trafficType value to a LiteLLM service_tier string. | ||
|
|
||
| This allows the same `_priority` / `_flex` cost-key suffix logic used for | ||
| OpenAI/Azure to work for Gemini and Vertex AI models. | ||
|
|
||
| trafficType values seen in practice | ||
| ------------------------------------ | ||
| ON_DEMAND -> standard pricing (service_tier = None) | ||
| ON_DEMAND_PRIORITY -> priority pricing (service_tier = "priority") | ||
| FLEX / BATCH -> batch/flex pricing (service_tier = "flex") | ||
| """ | ||
| if traffic_type is None: | ||
| return None | ||
| service_tier = _GEMINI_TRAFFIC_TYPE_TO_SERVICE_TIER.get(traffic_type.upper()) | ||
| return service_tier | ||
|
Comment on lines
+710
to
+737
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gemini-specific mapping outside
This keeps Context Used: Rule from Why: This practice ensur... (source) Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time! |
||
|
|
||
|
|
||
| def _get_usage_object( | ||
| completion_response: Any, | ||
| ) -> Optional[Usage]: | ||
|
|
@@ -1145,6 +1178,20 @@ def completion_cost( # noqa: PLR0915 | |
| "custom_llm_provider", custom_llm_provider or None | ||
| ) | ||
| region_name = hidden_params.get("region_name", region_name) | ||
|
|
||
| # For Gemini/Vertex AI responses, trafficType is stored in | ||
| # provider_specific_fields. Map it to the service_tier used | ||
| # by the cost key lookup (_priority / _flex suffixes) so that | ||
| # ON_DEMAND_PRIORITY requests are billed at priority prices. | ||
| if service_tier is None: | ||
| provider_specific = ( | ||
| hidden_params.get("provider_specific_fields") or {} | ||
| ) | ||
| raw_traffic_type = provider_specific.get("traffic_type") | ||
| if raw_traffic_type: | ||
| service_tier = _map_traffic_type_to_service_tier( | ||
| raw_traffic_type | ||
| ) | ||
| else: | ||
| if model is None: | ||
| raise ValueError( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gemini-specific mapping placed outside
llms/The
_GEMINI_TRAFFIC_TYPE_TO_SERVICE_TIERdictionary and_map_traffic_type_to_service_tierfunction are Gemini/Vertex AI-specific concepts (ON_DEMAND,ON_DEMAND_PRIORITY,FLEX,BATCHare Gemini traffic types). Per repository conventions, provider-specific code should live inside thellms/directory (e.g.,litellm/llms/gemini/cost_calculator.pyorlitellm/llms/vertex_ai/cost_calculator.py).Consider moving this mapping to
litellm/llms/gemini/cost_calculator.pyor a shared Gemini/Vertex utility, and importing it here.Context Used: Rule from
dashboard- What: Avoid writing provider-specific code outside of the llms/ directory.Why: This practice ensur... (source)
Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!