diff --git a/docs/my-website/docs/completion/knowledgebase.md b/docs/my-website/docs/completion/knowledgebase.md index ee0e30867853..e772f4fe9556 100644 --- a/docs/my-website/docs/completion/knowledgebase.md +++ b/docs/my-website/docs/completion/knowledgebase.md @@ -412,6 +412,219 @@ This is sent to: `https://bedrock-agent-runtime.{aws_region}.amazonaws.com/knowl This process happens automatically whenever you include the `vector_store_ids` parameter in your request. +## Accessing Search Results (Citations) + +When using vector stores, LiteLLM automatically returns search results in `provider_specific_fields`. This allows you to show users citations for the AI's response. + +### Key Concept + +Search results are always in: `response.choices[0].message.provider_specific_fields["search_results"]` + +For streaming: Results appear in the **final chunk** when `finish_reason == "stop"` + +### Non-Streaming Example + + +**Non-Streaming Response with search results:** + +```json +{ + "id": "chatcmpl-abc123", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": "LiteLLM is a platform...", + "provider_specific_fields": { + "search_results": [{ + "search_query": "What is litellm?", + "data": [{ + "score": 0.95, + "content": [{"text": "...", "type": "text"}], + "filename": "litellm-docs.md", + "file_id": "doc-123" + }] + }] + } + }, + "finish_reason": "stop" + }] +} +``` + + + + +```python +from openai import OpenAI + +client = OpenAI( + base_url="http://localhost:4000", + api_key="your-litellm-api-key" +) + +response = client.chat.completions.create( + model="claude-3-5-sonnet", + messages=[{"role": "user", "content": "What is litellm?"}], + tools=[{"type": "file_search", "vector_store_ids": ["T37J8R4WTM"]}] +) + +# Get AI response +print(response.choices[0].message.content) + +# Get search results (citations) +search_results = response.choices[0].message.provider_specific_fields.get("search_results", []) + +for result_page in search_results: + for idx, item in enumerate(result_page['data'], 1): + print(f"[{idx}] {item.get('filename', 'Unknown')} (score: {item['score']:.2f})") +``` + + + + + +```typescript +import OpenAI from 'openai'; + +const client = new OpenAI({ + baseURL: 'http://localhost:4000', + apiKey: process.env.LITELLM_API_KEY +}); + +const response = await client.chat.completions.create({ + model: 'claude-3-5-sonnet', + messages: [{ role: 'user', content: 'What is litellm?' }], + tools: [{ type: 'file_search', vector_store_ids: ['T37J8R4WTM'] }] +}); + +// Get AI response +console.log(response.choices[0].message.content); + +// Get search results (citations) +const message = response.choices[0].message as any; +const searchResults = message.provider_specific_fields?.search_results || []; + +searchResults.forEach((page: any) => { + page.data.forEach((item: any, idx: number) => { + console.log(`[${idx + 1}] ${item.filename || 'Unknown'} (${item.score.toFixed(2)})`); + }); +}); +``` + + + + +### Streaming Example + +**Streaming Response with search results (final chunk):** + +```json +{ + "id": "chatcmpl-abc123", + "choices": [{ + "index": 0, + "delta": { + "provider_specific_fields": { + "search_results": [{ + "search_query": "What is litellm?", + "data": [{ + "score": 0.95, + "content": [{"text": "...", "type": "text"}], + "filename": "litellm-docs.md", + "file_id": "doc-123" + }] + }] + } + }, + "finish_reason": "stop" + }] +} +``` + + + + +```python +from openai import OpenAI + +client = OpenAI( + base_url="http://localhost:4000", + api_key="your-litellm-api-key" +) + +stream = client.chat.completions.create( + model="claude-3-5-sonnet", + messages=[{"role": "user", "content": "What is litellm?"}], + tools=[{"type": "file_search", "vector_store_ids": ["T37J8R4WTM"]}], + stream=True +) + +for chunk in stream: + # Stream content + if chunk.choices[0].delta.content: + print(chunk.choices[0].delta.content, end="", flush=True) + + # Get citations in final chunk + if chunk.choices[0].finish_reason == "stop": + search_results = getattr(chunk.choices[0].delta, 'provider_specific_fields', {}).get('search_results', []) + if search_results: + print("\n\nSources:") + for page in search_results: + for idx, item in enumerate(page['data'], 1): + print(f" [{idx}] {item.get('filename', 'Unknown')} ({item['score']:.2f})") +``` + + + + + +```typescript +import OpenAI from 'openai'; + +const stream = await client.chat.completions.create({ + model: 'claude-3-5-sonnet', + messages: [{ role: 'user', content: 'What is litellm?' }], + tools: [{ type: 'file_search', vector_store_ids: ['T37J8R4WTM'] }], + stream: true +}); + +for await (const chunk of stream) { + // Stream content + if (chunk.choices[0]?.delta?.content) { + process.stdout.write(chunk.choices[0].delta.content); + } + + // Get citations in final chunk + if (chunk.choices[0]?.finish_reason === 'stop') { + const searchResults = (chunk.choices[0].delta as any).provider_specific_fields?.search_results || []; + if (searchResults.length > 0) { + console.log('\n\nSources:'); + searchResults.forEach((page: any) => { + page.data.forEach((item: any, idx: number) => { + console.log(` [${idx + 1}] ${item.filename || 'Unknown'} (${item.score.toFixed(2)})`); + }); + }); + } + } +} +``` + + + + +### Search Result Fields + +| Field | Type | Description | +|-------|------|-------------| +| `search_query` | string | The query used to search the vector store | +| `data` | array | Array of search results | +| `data[].score` | float | Relevance score (0-1, higher is more relevant) | +| `data[].content` | array | Content chunks with `text` and `type` | +| `data[].filename` | string | Name of the source file (optional) | +| `data[].file_id` | string | Identifier for the source file (optional) | +| `data[].attributes` | object | Provider-specific metadata (optional) | + ## API Reference ### LiteLLM Completion Knowledge Base Parameters diff --git a/docs/my-website/docs/providers/bedrock_vector_store.md b/docs/my-website/docs/providers/bedrock_vector_store.md index 779c4fd0417d..39e1aec5ab83 100644 --- a/docs/my-website/docs/providers/bedrock_vector_store.md +++ b/docs/my-website/docs/providers/bedrock_vector_store.md @@ -138,7 +138,14 @@ print(response.choices[0].message.content) -Futher Reading Vector Stores: +## Accessing Search Results + +See how to access vector store search results in your response: +- [Accessing Search Results (Non-Streaming & Streaming)](../completion/knowledgebase#accessing-search-results-citations) + +## Further Reading + +Vector Stores: - [Always on Vector Stores](https://docs.litellm.ai/docs/completion/knowledgebase#always-on-for-a-model) - [Listing available vector stores on litellm proxy](https://docs.litellm.ai/docs/completion/knowledgebase#listing-available-vector-stores) - [How LiteLLM Vector Stores Work](https://docs.litellm.ai/docs/completion/knowledgebase#how-it-works) \ No newline at end of file diff --git a/docs/my-website/docs/proxy/deploy.md b/docs/my-website/docs/proxy/deploy.md index 4d51aa34dc3a..7d2389383d15 100644 --- a/docs/my-website/docs/proxy/deploy.md +++ b/docs/my-website/docs/proxy/deploy.md @@ -788,7 +788,7 @@ docker run --name litellm-proxy \ ## Platform-specific Guide - ### Terraform-based ECS Deployment diff --git a/docs/my-website/release_notes/v1.78.0-stable/index.md b/docs/my-website/release_notes/v1.78.0-stable/index.md index e9a471f45b57..63d5eaca0b02 100644 --- a/docs/my-website/release_notes/v1.78.0-stable/index.md +++ b/docs/my-website/release_notes/v1.78.0-stable/index.md @@ -40,7 +40,7 @@ import TabItem from '@theme/TabItem'; docker run \ -e STORE_MODEL_IN_DB=True \ -p 4000:4000 \ -ghcr.io/berriai/litellm:v1.78.0.rc.1 +ghcr.io/berriai/litellm:v1.78.0.rc.2 ``` @@ -48,7 +48,7 @@ ghcr.io/berriai/litellm:v1.78.0.rc.1 ``` showLineNumbers title="pip install litellm" -pip install litellm==1.78.0.rc.1 +pip install litellm==1.78.0.rc.2 ``` diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py index ee7e771faa60..615e64b97f99 100644 --- a/litellm/integrations/custom_logger.py +++ b/litellm/integrations/custom_logger.py @@ -204,6 +204,19 @@ async def async_post_call_success_deployment_hook( """ pass + async def async_post_call_streaming_deployment_hook( + self, + request_data: dict, + response_chunk: Any, + call_type: Optional[CallTypes], + ) -> Optional[Any]: + """ + Allow modifying streaming chunks just before they're returned to the user. + + This is called for each streaming chunk in the response. + """ + pass + #### Fallback Events - router/proxy only #### async def log_model_group_rate_limit_error( self, exception: Exception, original_model_group: Optional[str], kwargs: dict diff --git a/litellm/integrations/vector_store_integrations/vector_store_pre_call_hook.py b/litellm/integrations/vector_store_integrations/vector_store_pre_call_hook.py index 8ef160dd7834..b411a217575e 100644 --- a/litellm/integrations/vector_store_integrations/vector_store_pre_call_hook.py +++ b/litellm/integrations/vector_store_integrations/vector_store_pre_call_hook.py @@ -5,7 +5,7 @@ It searches the vector store for relevant context and appends it to the messages. """ -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, cast +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast import litellm import litellm.vector_stores @@ -88,6 +88,8 @@ async def async_get_chat_completion_prompt( return model, messages, non_default_params modified_messages: List[AllMessageValues] = messages.copy() + all_search_results: List[VectorStoreSearchResponse] = [] + for vector_store_to_run in vector_stores_to_run: # Get vector store id from the vector store config @@ -104,6 +106,8 @@ async def async_get_chat_completion_prompt( verbose_logger.debug(f"search_response: {search_response}") + # Store search results for later use in citations + all_search_results.append(search_response) # Process search results and append as context modified_messages = self._append_search_results_to_messages( @@ -115,6 +119,10 @@ async def async_get_chat_completion_prompt( num_results = 0 num_results = len(search_response.get("data", []) or []) verbose_logger.debug(f"Vector store search completed. Added context from {num_results} results") + + # Store search results as-is (already in OpenAI-compatible format) + if litellm_logging_obj and all_search_results: + litellm_logging_obj.model_call_details["search_results"] = all_search_results return model, modified_messages, non_default_params @@ -194,3 +202,109 @@ def _append_search_results_to_messages( return modified_messages return messages + + async def async_post_call_success_deployment_hook( + self, + request_data: dict, + response: Any, + call_type: Optional[Any], + ) -> Optional[Any]: + """ + Add search results to the response after successful LLM call. + + This hook adds the vector store search results (already in OpenAI-compatible format) + to the response's provider_specific_fields. + """ + try: + verbose_logger.debug("VectorStorePreCallHook.async_post_call_success_deployment_hook called") + + # Get logging object from request_data + litellm_logging_obj = request_data.get("litellm_logging_obj") + if not litellm_logging_obj: + verbose_logger.debug("No litellm_logging_obj in request_data") + return None + + verbose_logger.debug(f"model_call_details keys: {list(litellm_logging_obj.model_call_details.keys())}") + + # Get search results from model_call_details (already in OpenAI format) + search_results: Optional[List[VectorStoreSearchResponse]] = ( + litellm_logging_obj.model_call_details.get("search_results") + ) + + verbose_logger.debug(f"Search results found: {search_results is not None}") + + if not search_results: + verbose_logger.debug("No search results found") + return None + + # Add search results to response object + if hasattr(response, "choices") and response.choices: + for choice in response.choices: + if hasattr(choice, "message") and choice.message: + # Get existing provider_specific_fields or create new dict + provider_fields = getattr(choice.message, "provider_specific_fields", None) or {} + + # Add search results (already in OpenAI-compatible format) + provider_fields["search_results"] = search_results + + # Set the provider_specific_fields + setattr(choice.message, "provider_specific_fields", provider_fields) + + verbose_logger.debug(f"Added {len(search_results)} search results to response") + + # Return modified response + return response + + except Exception as e: + verbose_logger.exception(f"Error adding search results to response: {str(e)}") + # Don't fail the request if search results fail to be added + return None + + async def async_post_call_streaming_deployment_hook( + self, + request_data: dict, + response_chunk: Any, + call_type: Optional[Any], + ) -> Optional[Any]: + """ + Add search results to the final streaming chunk. + + This hook is called for the final streaming chunk, allowing us to add + search results to the stream before it's returned to the user. + """ + try: + verbose_logger.debug("VectorStorePreCallHook.async_post_call_streaming_deployment_hook called") + + # Get search results from model_call_details (already in OpenAI format) + search_results: Optional[List[VectorStoreSearchResponse]] = ( + request_data.get("search_results") + ) + + verbose_logger.debug(f"Search results found for streaming chunk: {search_results is not None}") + + if not search_results: + verbose_logger.debug("No search results found for streaming chunk") + return response_chunk + + # Add search results to streaming chunk + if hasattr(response_chunk, "choices") and response_chunk.choices: + for choice in response_chunk.choices: + if hasattr(choice, "delta") and choice.delta: + # Get existing provider_specific_fields or create new dict + provider_fields = getattr(choice.delta, "provider_specific_fields", None) or {} + + # Add search results (already in OpenAI-compatible format) + provider_fields["search_results"] = search_results + + # Set the provider_specific_fields + choice.delta.provider_specific_fields = provider_fields + + verbose_logger.debug(f"Added {len(search_results)} search results to streaming chunk") + + # Return modified chunk + return response_chunk + + except Exception as e: + verbose_logger.exception(f"Error adding search results to streaming chunk: {str(e)}") + # Don't fail the request if search results fail to be added + return response_chunk diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index eafcab885577..7369cdf92845 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -699,6 +699,9 @@ def get_custom_logger_for_prompt_management( self.model_call_details["prompt_integration"] = ( vector_store_custom_logger.__class__.__name__ ) + # Add to global callbacks so post-call hooks are invoked + if vector_store_custom_logger and vector_store_custom_logger not in litellm.callbacks: + litellm.logging_callback_manager.add_litellm_callback(vector_store_custom_logger) return vector_store_custom_logger return None diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py index 1daf543cfcb4..64223a9ba4e8 100644 --- a/litellm/litellm_core_utils/streaming_handler.py +++ b/litellm/litellm_core_utils/streaming_handler.py @@ -20,7 +20,9 @@ from litellm.litellm_core_utils.thread_pool_executor import executor from litellm.types.llms.openai import ChatCompletionChunk from litellm.types.router import GenericLiteLLMParams -from litellm.types.utils import Delta +from litellm.types.utils import ( + Delta, +) from litellm.types.utils import GenericStreamingChunk as GChunk from litellm.types.utils import ( ModelResponse, @@ -1520,6 +1522,43 @@ def set_logging_event_loop(self, loop): """ self.logging_loop = loop + async def _call_post_streaming_deployment_hook(self, chunk): + """ + Call the post-call streaming deployment hook for callbacks. + + This allows callbacks to modify streaming chunks before they're returned. + """ + try: + import litellm + from litellm.integrations.custom_logger import CustomLogger + from litellm.types.utils import CallTypes + + # Get request kwargs from logging object + request_data = self.logging_obj.model_call_details + call_type_str = self.logging_obj.call_type + + try: + typed_call_type = CallTypes(call_type_str) + except ValueError: + typed_call_type = None + + # Call hooks for all callbacks + for callback in litellm.callbacks: + if isinstance(callback, CustomLogger) and hasattr(callback, "async_post_call_streaming_deployment_hook"): + result = await callback.async_post_call_streaming_deployment_hook( + request_data=request_data, + response_chunk=chunk, + call_type=typed_call_type, + ) + if result is not None: + chunk = result + + return chunk + except Exception as e: + from litellm._logging import verbose_logger + verbose_logger.exception(f"Error in post-call streaming deployment hook: {str(e)}") + return chunk + def cache_streaming_response(self, processed_chunk, cache_hit: bool): """ Caches the streaming response @@ -1825,6 +1864,11 @@ async def __anext__(self): # noqa: PLR0915 if self.sent_last_chunk is True and self.stream_options is None: usage = calculate_total_usage(chunks=self.chunks) processed_chunk._hidden_params["usage"] = usage + + # Call post-call streaming deployment hook for final chunk + if self.sent_last_chunk is True: + processed_chunk = await self._call_post_streaming_deployment_hook(processed_chunk) + return processed_chunk raise StopAsyncIteration else: # temporary patch for non-aiohttp async calls diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 41b36d46c755..d205d73da8eb 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -377,6 +377,7 @@ class LiteLLMRoutes(enum.Enum): llm_api_routes = ( openai_routes + anthropic_routes + + google_routes + mapped_pass_through_routes + passthrough_routes_wildcard + apply_guardrail_routes diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 338fa98118c8..04a3e89599e3 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,9 +1,8 @@ model_list: - - model_name: db-openai-endpoint + - model_name: anthropic/* litellm_params: - model: openai/gm - api_key: hi - api_base: https://exampleopenaiendpoint-production.up.railway.app/ + model: anthropic/* + litellm_settings: diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py index 02c6f2cf8cf8..96c9b6be3ad3 100644 --- a/litellm/types/llms/anthropic.py +++ b/litellm/types/llms/anthropic.py @@ -156,6 +156,36 @@ class CitationsObject(TypedDict): enabled: bool +class AnthropicCitationPageLocation(TypedDict, total=False): + """ + Anthropic citation for page-based references. + Used when citing from documents with page numbers. + """ + type: Literal["page_location"] + cited_text: str # The exact text being cited (not counted towards output tokens) + document_index: int # Index referencing the cited document + document_title: Optional[str] # Title of the cited document + start_page_number: int # 1-indexed starting page + end_page_number: int # Exclusive ending page + + +class AnthropicCitationCharLocation(TypedDict, total=False): + """ + Anthropic citation for character-based references. + Used when citing from text with character positions. + """ + type: Literal["char_location"] + cited_text: str # The exact text being cited (not counted towards output tokens) + document_index: int # Index referencing the cited document + document_title: Optional[str] # Title of the cited document + start_char_index: int # Starting character index for the citation + end_char_index: int # Ending character index for the citation + + +# Union type for all citation formats +AnthropicCitation = Union[AnthropicCitationPageLocation, AnthropicCitationCharLocation] + + class AnthropicMessagesDocumentParam(TypedDict, total=False): type: Required[Literal["document"]] source: Required[ diff --git a/litellm/utils.py b/litellm/utils.py index 5861d703a344..68588b3086a1 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1507,7 +1507,7 @@ async def wrapper_async(*args, **kwargs): # noqa: PLR0915 ) # Only run if call_type is a valid value in CallTypes if call_type in [ct.value for ct in CallTypes]: - await async_post_call_success_deployment_hook( + result = await async_post_call_success_deployment_hook( request_data=kwargs, response=result, call_type=CallTypes(call_type), diff --git a/tests/logging_callback_tests/test_bedrock_knowledgebase_hook.py b/tests/logging_callback_tests/test_bedrock_knowledgebase_hook.py index 60eb53ed6af5..5b8a99bff7f8 100644 --- a/tests/logging_callback_tests/test_bedrock_knowledgebase_hook.py +++ b/tests/logging_callback_tests/test_bedrock_knowledgebase_hook.py @@ -118,7 +118,7 @@ async def test_e2e_bedrock_knowledgebase_retrieval_with_completion(setup_vector_ @pytest.mark.asyncio async def test_e2e_bedrock_knowledgebase_retrieval_with_llm_api_call(setup_vector_store_registry): """ - Test that the Bedrock Knowledge Base Hook works when making a real llm api call + Test that the Bedrock Knowledge Base Hook works when making a real llm api call and returns citations. """ # Init client @@ -132,9 +132,85 @@ async def test_e2e_bedrock_knowledgebase_retrieval_with_llm_api_call(setup_vecto ], client=async_client ) + print("OPENAI RESPONSE:", json.dumps(dict(response), indent=4, default=str)) assert response is not None + + # Check that search_results are present in provider_specific_fields + assert hasattr(response.choices[0].message, "provider_specific_fields") + provider_fields = response.choices[0].message.provider_specific_fields + assert provider_fields is not None + assert "search_results" in provider_fields + search_results = provider_fields["search_results"] + assert search_results is not None + assert len(search_results) > 0 + + # Check search result structure (OpenAI-compatible format) + first_search_result = search_results[0] + assert "object" in first_search_result + assert first_search_result["object"] == "vector_store.search_results.page" + assert "data" in first_search_result + assert len(first_search_result["data"]) > 0 + + # Check individual result structure + first_result = first_search_result["data"][0] + assert "score" in first_result + assert "content" in first_result + print(f"Search results returned: {len(search_results)}") + print(f"First search result has {len(first_search_result['data'])} items") + + +@pytest.mark.asyncio +async def test_e2e_bedrock_knowledgebase_retrieval_with_llm_api_call_streaming(setup_vector_store_registry): + """ + Test that the Bedrock Knowledge Base Hook works with streaming and returns search_results in chunks. + """ + + # Init client + # litellm._turn_on_debug() + async_client = AsyncHTTPHandler() + response = await litellm.acompletion( + model="anthropic/claude-3-5-haiku-latest", + messages=[{"role": "user", "content": "what is litellm?"}], + vector_store_ids = [ + "T37J8R4WTM" + ], + stream=True, + client=async_client + ) + + # Collect chunks + chunks = [] + search_results_found = False + async for chunk in response: + chunks.append(chunk) + print(f"Chunk: {chunk}") + + # Check if this chunk has search_results in provider_specific_fields + if hasattr(chunk, "choices") and chunk.choices: + for choice in chunk.choices: + if hasattr(choice, "delta") and choice.delta: + provider_fields = getattr(choice.delta, "provider_specific_fields", None) + if provider_fields and "search_results" in provider_fields: + search_results = provider_fields["search_results"] + print(f"Found search_results in streaming chunk: {len(search_results)} results") + + # Verify structure + assert search_results is not None + assert len(search_results) > 0 + + first_search_result = search_results[0] + assert "object" in first_search_result + assert first_search_result["object"] == "vector_store.search_results.page" + assert "data" in first_search_result + assert len(first_search_result["data"]) > 0 + + search_results_found = True + + print(f"Total chunks received: {len(chunks)}") + assert len(chunks) > 0 + assert search_results_found, "search_results should be present in streaming chunks" @pytest.mark.asyncio diff --git a/tests/test_litellm/proxy/auth/test_route_checks.py b/tests/test_litellm/proxy/auth/test_route_checks.py index 0c72c8bb1630..7d00b812a5cc 100644 --- a/tests/test_litellm/proxy/auth/test_route_checks.py +++ b/tests/test_litellm/proxy/auth/test_route_checks.py @@ -156,6 +156,31 @@ def test_virtual_key_llm_api_route_includes_passthrough_prefix(route): assert result is True +@pytest.mark.parametrize( + "route", + [ + "/v1beta/models/gemini-2.5-flash:countTokens", + "/v1beta/models/gemini-2.0-flash:generateContent", + "/v1beta/models/gemini-1.5-pro:streamGenerateContent", + "/models/gemini-2.5-flash:countTokens", + "/models/gemini-2.0-flash:generateContent", + "/models/gemini-1.5-pro:streamGenerateContent", + ], +) +def test_virtual_key_llm_api_routes_allows_google_routes(route): + """ + Test that virtual keys with llm_api_routes permission can access Google AI Studio routes. + """ + + valid_token = UserAPIKeyAuth(user_id="test_user", allowed_routes=["llm_api_routes"]) + + result = RouteChecks.is_virtual_key_allowed_to_call_route( + route=route, valid_token=valid_token + ) + + assert result is True + + def test_virtual_key_allowed_routes_with_multiple_litellm_routes_member_names(): """Test that virtual key works with multiple LiteLLMRoutes member names in allowed_routes""" diff --git a/ui/litellm-dashboard/src/components/chat_ui/ChatUI.tsx b/ui/litellm-dashboard/src/components/chat_ui/ChatUI.tsx index ab288c369838..680f66702bc3 100644 --- a/ui/litellm-dashboard/src/components/chat_ui/ChatUI.tsx +++ b/ui/litellm-dashboard/src/components/chat_ui/ChatUI.tsx @@ -37,6 +37,7 @@ import ChatImageRenderer from "./ChatImageRenderer"; import { createChatMultimodalMessage, createChatDisplayMessage } from "./ChatImageUtils"; import SessionManagement from "./SessionManagement"; import MCPEventsDisplay, { MCPEvent } from "./MCPEventsDisplay"; +import { SearchResultsDisplay } from "./SearchResultsDisplay"; import { ApiOutlined, KeyOutlined, @@ -436,6 +437,25 @@ const ChatUI: React.FC = ({ accessToken, token, userRole, userID, d }); }; + const updateSearchResults = (searchResults: any[]) => { + console.log("Received search results:", searchResults); + setChatHistory((prevHistory) => { + const lastMessage = prevHistory[prevHistory.length - 1]; + + if (lastMessage && lastMessage.role === "assistant") { + console.log("Updating message with search results"); + const updatedMessage = { + ...lastMessage, + searchResults, + }; + + return [...prevHistory.slice(0, prevHistory.length - 1), updatedMessage]; + } + + return prevHistory; + }); + }; + const handleResponseId = (responseId: string) => { console.log("Received response ID for session management:", responseId); if (useApiSessionManagement) { @@ -687,6 +707,7 @@ const ChatUI: React.FC = ({ accessToken, token, userRole, userID, d selectedGuardrails.length > 0 ? selectedGuardrails : undefined, selectedMCPTools, // Pass the selected tools array updateChatImageUI, // Pass the image callback + updateSearchResults, // Pass the search results callback ); } else if (endpointType === EndpointType.IMAGE) { // For image generation @@ -1101,6 +1122,11 @@ const ChatUI: React.FC = ({ accessToken, token, userRole, userID, d )} + {/* Show search results at the start of assistant messages */} + {message.role === "assistant" && message.searchResults && ( + + )} +
>({}); + + if (!searchResults || searchResults.length === 0) { + return null; + } + + const toggleResult = (pageIndex: number, resultIndex: number) => { + const key = `${pageIndex}-${resultIndex}`; + setExpandedResults((prev) => ({ + ...prev, + [key]: !prev[key], + })); + }; + + const totalResults = searchResults.reduce((sum, page) => sum + page.data.length, 0); + + return ( +
+ + + {isExpanded && ( +
+
+ {searchResults.map((resultPage, pageIndex) => ( +
+
+ Query: + "{resultPage.search_query}" + + {resultPage.data.length} result{resultPage.data.length !== 1 ? 's' : ''} +
+ +
+ {resultPage.data.map((result, resultIndex) => { + const isResultExpanded = expandedResults[`${pageIndex}-${resultIndex}`] || false; + + return ( +
+
toggleResult(pageIndex, resultIndex)} + > +
+ + + + + + {result.filename || result.file_id || `Result ${resultIndex + 1}`} + + + {result.score.toFixed(3)} + +
+
+ + {isResultExpanded && ( +
+
+ {result.content.map((content, contentIndex) => ( +
+
+ {content.text} +
+
+ ))} + + {result.attributes && Object.keys(result.attributes).length > 0 && ( +
+
Metadata:
+
+ {Object.entries(result.attributes).map(([key, value]) => ( +
+ {key}: + {String(value)} +
+ ))} +
+
+ )} +
+
+ )} +
+ ); + })} +
+
+ ))} +
+
+ )} +
+ ); +} + diff --git a/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx b/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx index 44aea48986bb..7baad24e48c4 100644 --- a/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx +++ b/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx @@ -1,6 +1,7 @@ import openai from "openai"; import { ChatCompletionMessageParam } from "openai/resources/chat/completions"; import { TokenUsage } from "../ResponseMetrics"; +import { VectorStoreSearchResponse } from "../types"; import { getProxyBaseUrl } from "@/components/networking"; export async function makeOpenAIChatCompletionRequest( @@ -18,6 +19,7 @@ export async function makeOpenAIChatCompletionRequest( guardrails?: string[], selectedMCPTools?: string[], onImageGenerated?: (imageUrl: string, model?: string) => void, + onSearchResults?: (searchResults: VectorStoreSearchResponse[]) => void, ) { // base url should be the current base_url const isLocal = process.env.NODE_ENV === "development"; @@ -127,6 +129,12 @@ export async function makeOpenAIChatCompletionRequest( fullReasoningContent += reasoningContent; } + // Check for search results in provider_specific_fields + if (delta && delta.provider_specific_fields?.search_results && onSearchResults) { + console.log("Search results found:", delta.provider_specific_fields.search_results); + onSearchResults(delta.provider_specific_fields.search_results); + } + // Check for usage data using type assertion const chunkWithUsage = chunk as any; if (chunkWithUsage.usage && onUsageData) { diff --git a/ui/litellm-dashboard/src/components/chat_ui/types.ts b/ui/litellm-dashboard/src/components/chat_ui/types.ts index 454256e56cb1..e8a98473cf40 100644 --- a/ui/litellm-dashboard/src/components/chat_ui/types.ts +++ b/ui/litellm-dashboard/src/components/chat_ui/types.ts @@ -56,6 +56,20 @@ export interface StreamingResponse { usage?: Usage; } +export interface VectorStoreSearchResult { + score: number; + content: Array<{ text: string; type: string }>; + file_id?: string; + filename?: string; + attributes?: Record; +} + +export interface VectorStoreSearchResponse { + object: string; + search_query: string; + data: VectorStoreSearchResult[]; +} + export interface MessageType { role: string; content: string | MultimodalContent[]; @@ -75,6 +89,7 @@ export interface MessageType { url: string; detail: string; }; + searchResults?: VectorStoreSearchResponse[]; } export interface MultimodalContent {