Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 169 additions & 19 deletions litellm/proxy/proxy_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7706,6 +7706,154 @@ def _enrich_model_info_with_litellm_data(
return model


async def _apply_search_filter_to_models(
all_models: List[Dict[str, Any]],
search: str,
page: int,
size: int,
prisma_client: Optional[Any],
proxy_config: Any,
) -> Tuple[List[Dict[str, Any]], Optional[int]]:
"""
Apply search filter to models, querying database for additional matching models.

Args:
all_models: List of models to filter
search: Search term (case-insensitive)
page: Current page number
size: Page size
prisma_client: Prisma client for database queries
proxy_config: Proxy config for decrypting models

Returns:
Tuple of (filtered_models, total_count). total_count is None if not searching.
"""
if not search or not search.strip():
return all_models, None

search_lower = search.lower().strip()

# Filter models in router by search term
filtered_router_models = [
m for m in all_models
if search_lower in m.get("model_name", "").lower()
]

# Separate filtered models into config vs db models, and track db model IDs
filtered_config_models = []
db_model_ids_in_router = set()

for m in filtered_router_models:
model_info = m.get("model_info", {})
is_db_model = model_info.get("db_model", False)
model_id = model_info.get("id")

if is_db_model and model_id:
db_model_ids_in_router.add(model_id)
else:
filtered_config_models.append(m)

config_models_count = len(filtered_config_models)
db_models_in_router_count = len(db_model_ids_in_router)
router_models_count = config_models_count + db_models_in_router_count

# Query database for additional models with search term
db_models = []
db_models_total_count = 0
models_needed_for_page = size * page

try:
# Build where condition for database query
db_where_condition: Dict[str, Any] = {
"model_name": {
"contains": search_lower,
"mode": "insensitive",
}
}
# Exclude models already in router if we have any
if db_model_ids_in_router:
db_where_condition["model_id"] = {
"not": {"in": list(db_model_ids_in_router)}
}

# Get total count of matching database models
db_models_total_count = await prisma_client.db.litellm_proxymodeltable.count(
where=db_where_condition
)

# Calculate total count for search results
search_total_count = router_models_count + db_models_total_count

# Fetch database models if we need more for the current page
if router_models_count < models_needed_for_page:
models_to_fetch = min(
models_needed_for_page - router_models_count,
db_models_total_count
)

if models_to_fetch > 0:
db_models_raw = await prisma_client.db.litellm_proxymodeltable.find_many(
where=db_where_condition,
take=models_to_fetch,
)

# Convert database models to router format
for db_model in db_models_raw:
decrypted_models = proxy_config.decrypt_model_list_from_db([db_model])
if decrypted_models:
db_models.extend(decrypted_models)
except Exception as e:
verbose_proxy_logger.exception(
f"Error querying database models with search: {str(e)}"
)
# If error, use router models count as fallback
search_total_count = router_models_count

# Combine all models
filtered_models = filtered_router_models + db_models
return filtered_models, search_total_count


def _paginate_models_response(
all_models: List[Dict[str, Any]],
page: int,
size: int,
total_count: Optional[int],
search: Optional[str],
) -> Dict[str, Any]:
"""
Paginate models and return response dictionary.

Args:
all_models: List of all models
page: Current page number
size: Page size
total_count: Total count (if None, uses len(all_models))
search: Search term (for logging)

Returns:
Paginated response dictionary
"""
if total_count is None:
total_count = len(all_models)

skip = (page - 1) * size
total_pages = -(-total_count // size) if total_count > 0 else 0
paginated_models = all_models[skip : skip + size]

verbose_proxy_logger.debug(
f"Pagination: skip={skip}, take={size}, total_count={total_count}, total_pages={total_pages}, search={search}"
)

return {
"data": paginated_models,
"total_count": total_count,
"current_page": page,
"total_pages": total_pages,
"size": size,
}


@router.get(
"/v2/model/info",
description="v2 - returns models available to the user based on their API key permissions. Shows model info from config.yaml (except api key and api base). Filter to just user-added models with ?user_models_only=true",
Expand All @@ -7727,6 +7875,9 @@ async def model_info_v2(
debug: Optional[bool] = False,
page: int = Query(1, description="Page number", ge=1),
size: int = Query(50, description="Page size", ge=1),
search: Optional[str] = fastapi.Query(
None, description="Search model names (case-insensitive partial match)"
),
):
"""
BETA ENDPOINT. Might change unexpectedly. Use `/v1/model/info` for now.
Expand Down Expand Up @@ -7760,6 +7911,16 @@ async def model_info_v2(
if model is not None:
all_models = [m for m in all_models if m["model_name"] == model]

# Apply search filter if provided
all_models, search_total_count = await _apply_search_filter_to_models(
all_models=all_models,
search=search or "",
page=page,
size=size,
prisma_client=prisma_client,
proxy_config=proxy_config,
)

if user_models_only:
all_models = await non_admin_all_models(
all_models=all_models,
Expand All @@ -7775,33 +7936,22 @@ async def model_info_v2(
llm_router=llm_router,
all_models=all_models,
)
# fill in model info based on config.yaml and litellm model_prices_and_context_window.json

# Fill in model info based on config.yaml and litellm model_prices_and_context_window.json
for i, _model in enumerate(all_models):
all_models[i] = _enrich_model_info_with_litellm_data(
model=_model, debug=debug if debug is not None else False, llm_router=llm_router
)

verbose_proxy_logger.debug("all_models: %s", all_models)

total_count = len(all_models)

skip = (page - 1) * size

total_pages = -(-total_count // size) if total_count > 0 else 0

paginated_models = all_models[skip : skip + size]

verbose_proxy_logger.debug(
f"Pagination: skip={skip}, take={size}, total_count={total_count}, total_pages={total_pages}"
return _paginate_models_response(
all_models=all_models,
page=page,
size=size,
total_count=search_total_count,
search=search,
)

return {
"data": paginated_models,
"total_count": total_count,
"current_page": page,
"total_pages": total_pages,
"size": size,
}


@router.get(
Expand Down
Loading
Loading