Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -800,9 +800,10 @@ def _map_reasoning_effort_to_thinking_level(
GeminiThinkingConfig with thinkingLevel and includeThoughts
"""
# Check if this is gemini-3-flash which supports MINIMAL thinking level
# Covers gemini-3-flash, gemini-3-flash-preview, gemini-3.1-flash, gemini-3.1-flash-lite-preview, etc.
is_gemini3flash = model and (
"gemini-3-flash-preview" in model.lower()
or "gemini-3-flash" in model.lower()
"gemini-3-flash" in model.lower()
or "gemini-3.1-flash" in model.lower()
)
Comment on lines 804 to 807
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hardcoded model-name strings for feature detection

The is_gemini3flash flag is determined by checking for hardcoded substring patterns ("gemini-3-flash", "gemini-3.1-flash"). According to the project's custom rule, model-specific feature flags should not be hardcoded in the source; they should instead be driven by a field in model_prices_and_context_window.json read via get_model_info. With the current approach, every new Gemini flash model that supports the "minimal" thinking level (e.g. a hypothetical gemini-3.2-flash) requires another code change and a new LiteLLM release, rather than a simple JSON update.

The same anti-pattern already exists in _map_reasoning_effort_to_thinking_budget (lines ~745-754), but this PR extends it further. The suggested approach would be to add a flag such as supports_minimal_thinking_level to each relevant model entry in model_prices_and_context_window.json and then check that flag here, similar to how supports_reasoning is used elsewhere in the codebase.

# Example of data-driven approach (conceptual):
model_info = litellm.get_model_info(model=model, custom_llm_provider="gemini")
is_gemini3flash = bool(model_info.get("supports_minimal_thinking_level"))

Context Used: Rule from dashboard - What: Do not hardcode model-specific flags in the codebase. Instead, put them in model_prices_and_co... (source)

is_gemini31pro = model and (
"gemini-3.1-pro-preview" in model.lower()
Expand Down
43 changes: 43 additions & 0 deletions tests/llm_translation/test_gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -1495,6 +1495,49 @@ def test_anthropic_thinking_param_via_map_openai_params():
assert thinking_config_2["thinkingBudget"] == 10000


def test_gemini_31_flash_lite_reasoning_effort_minimal():
"""
Test that reasoning_effort='minimal' correctly maps to thinkingLevel='minimal'
for gemini-3.1-flash-lite-preview (not 'low').

Regression test for: "minimal" reasoning_effort not supported for gemini-3.1-flash-lite-preview
"""
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
VertexGeminiConfig,
)

# gemini-3.1-flash-lite-preview should map "minimal" -> thinkingLevel "minimal"
result = VertexGeminiConfig._map_reasoning_effort_to_thinking_level(
reasoning_effort="minimal",
model="gemini-3.1-flash-lite-preview",
)
assert result["thinkingLevel"] == "minimal", (
f"Expected thinkingLevel='minimal' for gemini-3.1-flash-lite-preview, got '{result['thinkingLevel']}'"
)
assert result["includeThoughts"] is True

# Also verify via the full map_openai_params flow
from litellm.utils import return_raw_request
from litellm.types.utils import CallTypes

raw_request = return_raw_request(
endpoint=CallTypes.completion,
kwargs={
"model": "gemini/gemini-3.1-flash-lite-preview",
"messages": [{"role": "user", "content": "Hello"}],
"reasoning_effort": "minimal",
},
)
generation_config = raw_request["raw_request_body"]["generationConfig"]
thinking_config = generation_config["thinkingConfig"]
assert thinking_config.get("thinkingLevel") == "minimal", (
f"Expected thinkingLevel='minimal' via full flow, got {thinking_config}"
)
assert "thinkingBudget" not in thinking_config, (
"gemini-3.1-flash-lite-preview should use thinkingLevel, not thinkingBudget"
)


def test_gemini_image_size_limit_exceeded():
"""
Test that large images exceeding MAX_IMAGE_URL_DOWNLOAD_SIZE_MB are rejected.
Expand Down
Loading