diff --git a/docs/my-website/docs/completion/knowledgebase.md b/docs/my-website/docs/completion/knowledgebase.md
index ee0e30867853..e772f4fe9556 100644
--- a/docs/my-website/docs/completion/knowledgebase.md
+++ b/docs/my-website/docs/completion/knowledgebase.md
@@ -412,6 +412,219 @@ This is sent to: `https://bedrock-agent-runtime.{aws_region}.amazonaws.com/knowl
 
 This process happens automatically whenever you include the `vector_store_ids` parameter in your request.
 
+## Accessing Search Results (Citations)
+
+When using vector stores, LiteLLM automatically returns search results in `provider_specific_fields`. This allows you to show users citations for the AI's response.
+
+### Key Concept
+
+Search results are always in: `response.choices[0].message.provider_specific_fields["search_results"]`
+
+For streaming: Results appear in the **final chunk** when `finish_reason == "stop"`
+
+### Non-Streaming Example
+
+
+**Non-Streaming Response with search results:**
+
+```json
+{
+  "id": "chatcmpl-abc123",
+  "choices": [{
+    "index": 0,
+    "message": {
+      "role": "assistant",
+      "content": "LiteLLM is a platform...",
+      "provider_specific_fields": {
+        "search_results": [{
+          "search_query": "What is litellm?",
+          "data": [{
+            "score": 0.95,
+            "content": [{"text": "...", "type": "text"}],
+            "filename": "litellm-docs.md",
+            "file_id": "doc-123"
+          }]
+        }]
+      }
+    },
+    "finish_reason": "stop"
+  }]
+}
+```
+
+<Tabs>
+<TabItem value="python-sdk" label="Python SDK">
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:4000",
+    api_key="your-litellm-api-key"
+)
+
+response = client.chat.completions.create(
+    model="claude-3-5-sonnet",
+    messages=[{"role": "user", "content": "What is litellm?"}],
+    tools=[{"type": "file_search", "vector_store_ids": ["T37J8R4WTM"]}]
+)
+
+# Get AI response
+print(response.choices[0].message.content)
+
+# Get search results (citations)
+search_results = response.choices[0].message.provider_specific_fields.get("search_results", [])
+
+for result_page in search_results:
+    for idx, item in enumerate(result_page['data'], 1):
+        print(f"[{idx}] {item.get('filename', 'Unknown')} (score: {item['score']:.2f})")
+```
+
+</TabItem>
+
+<TabItem value="typescript" label="TypeScript SDK">
+
+```typescript
+import OpenAI from 'openai';
+
+const client = new OpenAI({
+  baseURL: 'http://localhost:4000',
+  apiKey: process.env.LITELLM_API_KEY
+});
+
+const response = await client.chat.completions.create({
+  model: 'claude-3-5-sonnet',
+  messages: [{ role: 'user', content: 'What is litellm?' }],
+  tools: [{ type: 'file_search', vector_store_ids: ['T37J8R4WTM'] }]
+});
+
+// Get AI response
+console.log(response.choices[0].message.content);
+
+// Get search results (citations)
+const message = response.choices[0].message as any;
+const searchResults = message.provider_specific_fields?.search_results || [];
+
+searchResults.forEach((page: any) => {
+  page.data.forEach((item: any, idx: number) => {
+    console.log(`[${idx + 1}] ${item.filename || 'Unknown'} (${item.score.toFixed(2)})`);
+  });
+});
+```
+
+</TabItem>
+</Tabs>
+
+### Streaming Example
+
+**Streaming Response with search results (final chunk):**
+
+```json
+{
+  "id": "chatcmpl-abc123",
+  "choices": [{
+    "index": 0,
+    "delta": {
+      "provider_specific_fields": {
+        "search_results": [{
+          "search_query": "What is litellm?",
+          "data": [{
+            "score": 0.95,
+            "content": [{"text": "...", "type": "text"}],
+            "filename": "litellm-docs.md",
+            "file_id": "doc-123"
+          }]
+        }]
+      }
+    },
+    "finish_reason": "stop"
+  }]
+}
+```
+
+<Tabs>
+<TabItem value="python-sdk" label="Python SDK">
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:4000",
+    api_key="your-litellm-api-key"
+)
+
+stream = client.chat.completions.create(
+    model="claude-3-5-sonnet",
+    messages=[{"role": "user", "content": "What is litellm?"}],
+    tools=[{"type": "file_search", "vector_store_ids": ["T37J8R4WTM"]}],
+    stream=True
+)
+
+for chunk in stream:
+    # Stream content
+    if chunk.choices[0].delta.content:
+        print(chunk.choices[0].delta.content, end="", flush=True)
+    
+    # Get citations in final chunk
+    if chunk.choices[0].finish_reason == "stop":
+        search_results = getattr(chunk.choices[0].delta, 'provider_specific_fields', {}).get('search_results', [])
+        if search_results:
+            print("\n\nSources:")
+            for page in search_results:
+                for idx, item in enumerate(page['data'], 1):
+                    print(f"  [{idx}] {item.get('filename', 'Unknown')} ({item['score']:.2f})")
+```
+
+</TabItem>
+
+<TabItem value="typescript" label="TypeScript SDK">
+
+```typescript
+import OpenAI from 'openai';
+
+const stream = await client.chat.completions.create({
+  model: 'claude-3-5-sonnet',
+  messages: [{ role: 'user', content: 'What is litellm?' }],
+  tools: [{ type: 'file_search', vector_store_ids: ['T37J8R4WTM'] }],
+  stream: true
+});
+
+for await (const chunk of stream) {
+  // Stream content
+  if (chunk.choices[0]?.delta?.content) {
+    process.stdout.write(chunk.choices[0].delta.content);
+  }
+  
+  // Get citations in final chunk
+  if (chunk.choices[0]?.finish_reason === 'stop') {
+    const searchResults = (chunk.choices[0].delta as any).provider_specific_fields?.search_results || [];
+    if (searchResults.length > 0) {
+      console.log('\n\nSources:');
+      searchResults.forEach((page: any) => {
+        page.data.forEach((item: any, idx: number) => {
+          console.log(`  [${idx + 1}] ${item.filename || 'Unknown'} (${item.score.toFixed(2)})`);
+        });
+      });
+    }
+  }
+}
+```
+
+</TabItem>
+</Tabs>
+
+### Search Result Fields
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `search_query` | string | The query used to search the vector store |
+| `data` | array | Array of search results |
+| `data[].score` | float | Relevance score (0-1, higher is more relevant) |
+| `data[].content` | array | Content chunks with `text` and `type` |
+| `data[].filename` | string | Name of the source file (optional) |
+| `data[].file_id` | string | Identifier for the source file (optional) |
+| `data[].attributes` | object | Provider-specific metadata (optional) |
+
 ## API Reference
 
 ### LiteLLM Completion Knowledge Base Parameters
diff --git a/docs/my-website/docs/providers/bedrock_vector_store.md b/docs/my-website/docs/providers/bedrock_vector_store.md
index 779c4fd0417d..39e1aec5ab83 100644
--- a/docs/my-website/docs/providers/bedrock_vector_store.md
+++ b/docs/my-website/docs/providers/bedrock_vector_store.md
@@ -138,7 +138,14 @@ print(response.choices[0].message.content)
 </Tabs>
 
 
-Futher Reading Vector Stores:
+## Accessing Search Results
+
+See how to access vector store search results in your response:
+- [Accessing Search Results (Non-Streaming & Streaming)](../completion/knowledgebase#accessing-search-results-citations)
+
+## Further Reading
+
+Vector Stores:
 - [Always on Vector Stores](https://docs.litellm.ai/docs/completion/knowledgebase#always-on-for-a-model)
 - [Listing available vector stores on litellm proxy](https://docs.litellm.ai/docs/completion/knowledgebase#listing-available-vector-stores)
 - [How LiteLLM Vector Stores Work](https://docs.litellm.ai/docs/completion/knowledgebase#how-it-works)
\ No newline at end of file
diff --git a/docs/my-website/docs/proxy/deploy.md b/docs/my-website/docs/proxy/deploy.md
index 4d51aa34dc3a..7d2389383d15 100644
--- a/docs/my-website/docs/proxy/deploy.md
+++ b/docs/my-website/docs/proxy/deploy.md
@@ -788,7 +788,7 @@ docker run --name litellm-proxy \
 ## Platform-specific Guide
 
 <Tabs>
-<TabItem value="AWS ECS" label="AWS ECS - Elastic Container Service>
+<TabItem value="AWS ECS" label="AWS ECS - Elastic Container Service">
 
 ### Terraform-based ECS Deployment
 
diff --git a/docs/my-website/release_notes/v1.78.0-stable/index.md b/docs/my-website/release_notes/v1.78.0-stable/index.md
index e9a471f45b57..63d5eaca0b02 100644
--- a/docs/my-website/release_notes/v1.78.0-stable/index.md
+++ b/docs/my-website/release_notes/v1.78.0-stable/index.md
@@ -40,7 +40,7 @@ import TabItem from '@theme/TabItem';
 docker run \
 -e STORE_MODEL_IN_DB=True \
 -p 4000:4000 \
-ghcr.io/berriai/litellm:v1.78.0.rc.1
+ghcr.io/berriai/litellm:v1.78.0.rc.2
 ```
 
 </TabItem>
@@ -48,7 +48,7 @@ ghcr.io/berriai/litellm:v1.78.0.rc.1
 <TabItem value="pip" label="Pip">
 
 ``` showLineNumbers title="pip install litellm"
-pip install litellm==1.78.0.rc.1
+pip install litellm==1.78.0.rc.2
 ```
 
 </TabItem>
diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
index ee7e771faa60..615e64b97f99 100644
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@@ -204,6 +204,19 @@ async def async_post_call_success_deployment_hook(
         """
         pass
 
+    async def async_post_call_streaming_deployment_hook(
+        self,
+        request_data: dict,
+        response_chunk: Any,
+        call_type: Optional[CallTypes],
+    ) -> Optional[Any]:
+        """
+        Allow modifying streaming chunks just before they're returned to the user.
+        
+        This is called for each streaming chunk in the response.
+        """
+        pass
+
     #### Fallback Events - router/proxy only ####
     async def log_model_group_rate_limit_error(
         self, exception: Exception, original_model_group: Optional[str], kwargs: dict
diff --git a/litellm/integrations/vector_store_integrations/vector_store_pre_call_hook.py b/litellm/integrations/vector_store_integrations/vector_store_pre_call_hook.py
index 8ef160dd7834..b411a217575e 100644
--- a/litellm/integrations/vector_store_integrations/vector_store_pre_call_hook.py
+++ b/litellm/integrations/vector_store_integrations/vector_store_pre_call_hook.py
@@ -5,7 +5,7 @@
 It searches the vector store for relevant context and appends it to the messages.
 """
 
-from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, cast
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast
 
 import litellm
 import litellm.vector_stores
@@ -88,6 +88,8 @@ async def async_get_chat_completion_prompt(
                 return model, messages, non_default_params
             
             modified_messages: List[AllMessageValues] = messages.copy()
+            all_search_results: List[VectorStoreSearchResponse] = []
+            
             for vector_store_to_run in vector_stores_to_run:
             
                 # Get vector store id from the vector store config
@@ -104,6 +106,8 @@ async def async_get_chat_completion_prompt(
 
                 verbose_logger.debug(f"search_response: {search_response}")
                 
+                # Store search results for later use in citations
+                all_search_results.append(search_response)
                 
                 # Process search results and append as context
                 modified_messages = self._append_search_results_to_messages(
@@ -115,6 +119,10 @@ async def async_get_chat_completion_prompt(
                 num_results = 0
                 num_results = len(search_response.get("data", []) or [])
                 verbose_logger.debug(f"Vector store search completed. Added context from {num_results} results")
+            
+            # Store search results as-is (already in OpenAI-compatible format)
+            if litellm_logging_obj and all_search_results:
+                litellm_logging_obj.model_call_details["search_results"] = all_search_results
                 
             return model, modified_messages, non_default_params
             
@@ -194,3 +202,109 @@ def _append_search_results_to_messages(
             return modified_messages
         
         return messages
+
+    async def async_post_call_success_deployment_hook(
+        self,
+        request_data: dict,
+        response: Any,
+        call_type: Optional[Any],
+    ) -> Optional[Any]:
+        """
+        Add search results to the response after successful LLM call.
+        
+        This hook adds the vector store search results (already in OpenAI-compatible format)
+        to the response's provider_specific_fields.
+        """
+        try:
+            verbose_logger.debug("VectorStorePreCallHook.async_post_call_success_deployment_hook called")
+            
+            # Get logging object from request_data
+            litellm_logging_obj = request_data.get("litellm_logging_obj")
+            if not litellm_logging_obj:
+                verbose_logger.debug("No litellm_logging_obj in request_data")
+                return None
+            
+            verbose_logger.debug(f"model_call_details keys: {list(litellm_logging_obj.model_call_details.keys())}")
+            
+            # Get search results from model_call_details (already in OpenAI format)
+            search_results: Optional[List[VectorStoreSearchResponse]] = (
+                litellm_logging_obj.model_call_details.get("search_results")
+            )
+            
+            verbose_logger.debug(f"Search results found: {search_results is not None}")
+            
+            if not search_results:
+                verbose_logger.debug("No search results found")
+                return None
+            
+            # Add search results to response object
+            if hasattr(response, "choices") and response.choices:
+                for choice in response.choices:
+                    if hasattr(choice, "message") and choice.message:
+                        # Get existing provider_specific_fields or create new dict
+                        provider_fields = getattr(choice.message, "provider_specific_fields", None) or {}
+                        
+                        # Add search results (already in OpenAI-compatible format)
+                        provider_fields["search_results"] = search_results
+                        
+                        # Set the provider_specific_fields
+                        setattr(choice.message, "provider_specific_fields", provider_fields)
+                        
+            verbose_logger.debug(f"Added {len(search_results)} search results to response")
+            
+            # Return modified response
+            return response
+            
+        except Exception as e:
+            verbose_logger.exception(f"Error adding search results to response: {str(e)}")
+            # Don't fail the request if search results fail to be added
+            return None
+
+    async def async_post_call_streaming_deployment_hook(
+        self,
+        request_data: dict,
+        response_chunk: Any,
+        call_type: Optional[Any],
+    ) -> Optional[Any]:
+        """
+        Add search results to the final streaming chunk.
+        
+        This hook is called for the final streaming chunk, allowing us to add
+        search results to the stream before it's returned to the user.
+        """
+        try:
+            verbose_logger.debug("VectorStorePreCallHook.async_post_call_streaming_deployment_hook called")
+            
+            # Get search results from model_call_details (already in OpenAI format)
+            search_results: Optional[List[VectorStoreSearchResponse]] = (
+                request_data.get("search_results")
+            )
+            
+            verbose_logger.debug(f"Search results found for streaming chunk: {search_results is not None}")
+            
+            if not search_results:
+                verbose_logger.debug("No search results found for streaming chunk")
+                return response_chunk
+            
+            # Add search results to streaming chunk
+            if hasattr(response_chunk, "choices") and response_chunk.choices:
+                for choice in response_chunk.choices:
+                    if hasattr(choice, "delta") and choice.delta:
+                        # Get existing provider_specific_fields or create new dict
+                        provider_fields = getattr(choice.delta, "provider_specific_fields", None) or {}
+                        
+                        # Add search results (already in OpenAI-compatible format)
+                        provider_fields["search_results"] = search_results
+                        
+                        # Set the provider_specific_fields
+                        choice.delta.provider_specific_fields = provider_fields
+                        
+            verbose_logger.debug(f"Added {len(search_results)} search results to streaming chunk")
+            
+            # Return modified chunk
+            return response_chunk
+            
+        except Exception as e:
+            verbose_logger.exception(f"Error adding search results to streaming chunk: {str(e)}")
+            # Don't fail the request if search results fail to be added
+            return response_chunk
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index eafcab885577..7369cdf92845 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -699,6 +699,9 @@ def get_custom_logger_for_prompt_management(
             self.model_call_details["prompt_integration"] = (
                 vector_store_custom_logger.__class__.__name__
             )
+            # Add to global callbacks so post-call hooks are invoked
+            if vector_store_custom_logger and vector_store_custom_logger not in litellm.callbacks:
+                litellm.logging_callback_manager.add_litellm_callback(vector_store_custom_logger)
             return vector_store_custom_logger
 
         return None
diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py
index 1daf543cfcb4..64223a9ba4e8 100644
--- a/litellm/litellm_core_utils/streaming_handler.py
+++ b/litellm/litellm_core_utils/streaming_handler.py
@@ -20,7 +20,9 @@
 from litellm.litellm_core_utils.thread_pool_executor import executor
 from litellm.types.llms.openai import ChatCompletionChunk
 from litellm.types.router import GenericLiteLLMParams
-from litellm.types.utils import Delta
+from litellm.types.utils import (
+    Delta,
+)
 from litellm.types.utils import GenericStreamingChunk as GChunk
 from litellm.types.utils import (
     ModelResponse,
@@ -1520,6 +1522,43 @@ def set_logging_event_loop(self, loop):
         """
         self.logging_loop = loop
 
+    async def _call_post_streaming_deployment_hook(self, chunk):
+        """
+        Call the post-call streaming deployment hook for callbacks.
+        
+        This allows callbacks to modify streaming chunks before they're returned.
+        """
+        try:
+            import litellm
+            from litellm.integrations.custom_logger import CustomLogger
+            from litellm.types.utils import CallTypes
+
+            # Get request kwargs from logging object
+            request_data = self.logging_obj.model_call_details
+            call_type_str = self.logging_obj.call_type
+            
+            try:
+                typed_call_type = CallTypes(call_type_str)
+            except ValueError:
+                typed_call_type = None
+            
+            # Call hooks for all callbacks
+            for callback in litellm.callbacks:
+                if isinstance(callback, CustomLogger) and hasattr(callback, "async_post_call_streaming_deployment_hook"):
+                    result = await callback.async_post_call_streaming_deployment_hook(
+                        request_data=request_data,
+                        response_chunk=chunk,
+                        call_type=typed_call_type,
+                    )
+                    if result is not None:
+                        chunk = result
+            
+            return chunk
+        except Exception as e:
+            from litellm._logging import verbose_logger
+            verbose_logger.exception(f"Error in post-call streaming deployment hook: {str(e)}")
+            return chunk
+
     def cache_streaming_response(self, processed_chunk, cache_hit: bool):
         """
         Caches the streaming response
@@ -1825,6 +1864,11 @@ async def __anext__(self):  # noqa: PLR0915
                     if self.sent_last_chunk is True and self.stream_options is None:
                         usage = calculate_total_usage(chunks=self.chunks)
                         processed_chunk._hidden_params["usage"] = usage
+                    
+                    # Call post-call streaming deployment hook for final chunk
+                    if self.sent_last_chunk is True:
+                        processed_chunk = await self._call_post_streaming_deployment_hook(processed_chunk)
+                    
                     return processed_chunk
                 raise StopAsyncIteration
             else:  # temporary patch for non-aiohttp async calls
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index 41b36d46c755..d205d73da8eb 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -377,6 +377,7 @@ class LiteLLMRoutes(enum.Enum):
     llm_api_routes = (
         openai_routes
         + anthropic_routes
+        + google_routes
         + mapped_pass_through_routes
         + passthrough_routes_wildcard
         + apply_guardrail_routes
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 338fa98118c8..04a3e89599e3 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -1,9 +1,8 @@
 model_list:
-  - model_name: db-openai-endpoint
+  - model_name: anthropic/*
     litellm_params:
-      model: openai/gm
-      api_key: hi
-      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+      model: anthropic/*
+
 
 
 litellm_settings:
diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py
index 02c6f2cf8cf8..96c9b6be3ad3 100644
--- a/litellm/types/llms/anthropic.py
+++ b/litellm/types/llms/anthropic.py
@@ -156,6 +156,36 @@ class CitationsObject(TypedDict):
     enabled: bool
 
 
+class AnthropicCitationPageLocation(TypedDict, total=False):
+    """
+    Anthropic citation for page-based references.
+    Used when citing from documents with page numbers.
+    """
+    type: Literal["page_location"]
+    cited_text: str  # The exact text being cited (not counted towards output tokens)
+    document_index: int  # Index referencing the cited document
+    document_title: Optional[str]  # Title of the cited document
+    start_page_number: int  # 1-indexed starting page
+    end_page_number: int  # Exclusive ending page
+
+
+class AnthropicCitationCharLocation(TypedDict, total=False):
+    """
+    Anthropic citation for character-based references.
+    Used when citing from text with character positions.
+    """
+    type: Literal["char_location"]
+    cited_text: str  # The exact text being cited (not counted towards output tokens)
+    document_index: int  # Index referencing the cited document
+    document_title: Optional[str]  # Title of the cited document
+    start_char_index: int  # Starting character index for the citation
+    end_char_index: int  # Ending character index for the citation
+
+
+# Union type for all citation formats
+AnthropicCitation = Union[AnthropicCitationPageLocation, AnthropicCitationCharLocation]
+
+
 class AnthropicMessagesDocumentParam(TypedDict, total=False):
     type: Required[Literal["document"]]
     source: Required[
diff --git a/litellm/utils.py b/litellm/utils.py
index 5861d703a344..68588b3086a1 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1507,7 +1507,7 @@ async def wrapper_async(*args, **kwargs):  # noqa: PLR0915
             )
             # Only run if call_type is a valid value in CallTypes
             if call_type in [ct.value for ct in CallTypes]:
-                await async_post_call_success_deployment_hook(
+                result = await async_post_call_success_deployment_hook(
                     request_data=kwargs,
                     response=result,
                     call_type=CallTypes(call_type),
diff --git a/tests/logging_callback_tests/test_bedrock_knowledgebase_hook.py b/tests/logging_callback_tests/test_bedrock_knowledgebase_hook.py
index 60eb53ed6af5..5b8a99bff7f8 100644
--- a/tests/logging_callback_tests/test_bedrock_knowledgebase_hook.py
+++ b/tests/logging_callback_tests/test_bedrock_knowledgebase_hook.py
@@ -118,7 +118,7 @@ async def test_e2e_bedrock_knowledgebase_retrieval_with_completion(setup_vector_
 @pytest.mark.asyncio
 async def test_e2e_bedrock_knowledgebase_retrieval_with_llm_api_call(setup_vector_store_registry):
     """
-    Test that the Bedrock Knowledge Base Hook works when making a real llm api call
+    Test that the Bedrock Knowledge Base Hook works when making a real llm api call and returns citations.
     """
     
     # Init client
@@ -132,9 +132,85 @@ async def test_e2e_bedrock_knowledgebase_retrieval_with_llm_api_call(setup_vecto
         ],
         client=async_client
     )
+    print("OPENAI RESPONSE:", json.dumps(dict(response), indent=4, default=str))
     assert response is not None
+    
+    # Check that search_results are present in provider_specific_fields
+    assert hasattr(response.choices[0].message, "provider_specific_fields")
+    provider_fields = response.choices[0].message.provider_specific_fields
+    assert provider_fields is not None
+    assert "search_results" in provider_fields
+    search_results = provider_fields["search_results"]
+    assert search_results is not None
+    assert len(search_results) > 0
+    
+    # Check search result structure (OpenAI-compatible format)
+    first_search_result = search_results[0]
+    assert "object" in first_search_result
+    assert first_search_result["object"] == "vector_store.search_results.page"
+    assert "data" in first_search_result
+    assert len(first_search_result["data"]) > 0
+    
+    # Check individual result structure
+    first_result = first_search_result["data"][0]
+    assert "score" in first_result
+    assert "content" in first_result
+    print(f"Search results returned: {len(search_results)}")
+    print(f"First search result has {len(first_search_result['data'])} items")
+
+
 
 
+@pytest.mark.asyncio
+async def test_e2e_bedrock_knowledgebase_retrieval_with_llm_api_call_streaming(setup_vector_store_registry):
+    """
+    Test that the Bedrock Knowledge Base Hook works with streaming and returns search_results in chunks.
+    """
+    
+    # Init client
+    # litellm._turn_on_debug()
+    async_client = AsyncHTTPHandler()
+    response = await litellm.acompletion(
+        model="anthropic/claude-3-5-haiku-latest",
+        messages=[{"role": "user", "content": "what is litellm?"}],
+        vector_store_ids = [
+            "T37J8R4WTM"
+        ],
+        stream=True,
+        client=async_client
+    )
+    
+    # Collect chunks
+    chunks = []
+    search_results_found = False
+    async for chunk in response:
+        chunks.append(chunk)
+        print(f"Chunk: {chunk}")
+        
+        # Check if this chunk has search_results in provider_specific_fields
+        if hasattr(chunk, "choices") and chunk.choices:
+            for choice in chunk.choices:
+                if hasattr(choice, "delta") and choice.delta:
+                    provider_fields = getattr(choice.delta, "provider_specific_fields", None)
+                    if provider_fields and "search_results" in provider_fields:
+                        search_results = provider_fields["search_results"]
+                        print(f"Found search_results in streaming chunk: {len(search_results)} results")
+                        
+                        # Verify structure
+                        assert search_results is not None
+                        assert len(search_results) > 0
+                        
+                        first_search_result = search_results[0]
+                        assert "object" in first_search_result
+                        assert first_search_result["object"] == "vector_store.search_results.page"
+                        assert "data" in first_search_result
+                        assert len(first_search_result["data"]) > 0
+                        
+                        search_results_found = True
+    
+    print(f"Total chunks received: {len(chunks)}")
+    assert len(chunks) > 0
+    assert search_results_found, "search_results should be present in streaming chunks"
 
 
 @pytest.mark.asyncio
diff --git a/tests/test_litellm/proxy/auth/test_route_checks.py b/tests/test_litellm/proxy/auth/test_route_checks.py
index 0c72c8bb1630..7d00b812a5cc 100644
--- a/tests/test_litellm/proxy/auth/test_route_checks.py
+++ b/tests/test_litellm/proxy/auth/test_route_checks.py
@@ -156,6 +156,31 @@ def test_virtual_key_llm_api_route_includes_passthrough_prefix(route):
     assert result is True
 
 
+@pytest.mark.parametrize(
+    "route",
+    [
+        "/v1beta/models/gemini-2.5-flash:countTokens",
+        "/v1beta/models/gemini-2.0-flash:generateContent",
+        "/v1beta/models/gemini-1.5-pro:streamGenerateContent",
+        "/models/gemini-2.5-flash:countTokens",
+        "/models/gemini-2.0-flash:generateContent",
+        "/models/gemini-1.5-pro:streamGenerateContent",
+    ],
+)
+def test_virtual_key_llm_api_routes_allows_google_routes(route):
+    """
+    Test that virtual keys with llm_api_routes permission can access Google AI Studio routes.
+    """
+
+    valid_token = UserAPIKeyAuth(user_id="test_user", allowed_routes=["llm_api_routes"])
+
+    result = RouteChecks.is_virtual_key_allowed_to_call_route(
+        route=route, valid_token=valid_token
+    )
+
+    assert result is True
+
+
 def test_virtual_key_allowed_routes_with_multiple_litellm_routes_member_names():
     """Test that virtual key works with multiple LiteLLMRoutes member names in allowed_routes"""
 
diff --git a/ui/litellm-dashboard/src/components/chat_ui/ChatUI.tsx b/ui/litellm-dashboard/src/components/chat_ui/ChatUI.tsx
index ab288c369838..680f66702bc3 100644
--- a/ui/litellm-dashboard/src/components/chat_ui/ChatUI.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui/ChatUI.tsx
@@ -37,6 +37,7 @@ import ChatImageRenderer from "./ChatImageRenderer";
 import { createChatMultimodalMessage, createChatDisplayMessage } from "./ChatImageUtils";
 import SessionManagement from "./SessionManagement";
 import MCPEventsDisplay, { MCPEvent } from "./MCPEventsDisplay";
+import { SearchResultsDisplay } from "./SearchResultsDisplay";
 import {
   ApiOutlined,
   KeyOutlined,
@@ -436,6 +437,25 @@ const ChatUI: React.FC<ChatUIProps> = ({ accessToken, token, userRole, userID, d
     });
   };
 
+  const updateSearchResults = (searchResults: any[]) => {
+    console.log("Received search results:", searchResults);
+    setChatHistory((prevHistory) => {
+      const lastMessage = prevHistory[prevHistory.length - 1];
+
+      if (lastMessage && lastMessage.role === "assistant") {
+        console.log("Updating message with search results");
+        const updatedMessage = {
+          ...lastMessage,
+          searchResults,
+        };
+
+        return [...prevHistory.slice(0, prevHistory.length - 1), updatedMessage];
+      }
+
+      return prevHistory;
+    });
+  };
+
   const handleResponseId = (responseId: string) => {
     console.log("Received response ID for session management:", responseId);
     if (useApiSessionManagement) {
@@ -687,6 +707,7 @@ const ChatUI: React.FC<ChatUIProps> = ({ accessToken, token, userRole, userID, d
             selectedGuardrails.length > 0 ? selectedGuardrails : undefined,
             selectedMCPTools, // Pass the selected tools array
             updateChatImageUI, // Pass the image callback
+            updateSearchResults, // Pass the search results callback
           );
         } else if (endpointType === EndpointType.IMAGE) {
           // For image generation
@@ -1101,6 +1122,11 @@ const ChatUI: React.FC<ChatUIProps> = ({ accessToken, token, userRole, userID, d
                           </div>
                         )}
 
+                      {/* Show search results at the start of assistant messages */}
+                      {message.role === "assistant" && message.searchResults && (
+                        <SearchResultsDisplay searchResults={message.searchResults} />
+                      )}
+
                       <div
                         className="whitespace-pre-wrap break-words max-w-full message-content"
                         style={{
diff --git a/ui/litellm-dashboard/src/components/chat_ui/SearchResultsDisplay.tsx b/ui/litellm-dashboard/src/components/chat_ui/SearchResultsDisplay.tsx
new file mode 100644
index 000000000000..14ea470c5d37
--- /dev/null
+++ b/ui/litellm-dashboard/src/components/chat_ui/SearchResultsDisplay.tsx
@@ -0,0 +1,120 @@
+import React, { useState } from "react";
+import { Button } from "antd";
+import { VectorStoreSearchResponse } from "./types";
+import { DatabaseOutlined, FileTextOutlined, DownOutlined, RightOutlined } from "@ant-design/icons";
+
+interface SearchResultsDisplayProps {
+  searchResults: VectorStoreSearchResponse[];
+}
+
+export function SearchResultsDisplay({ searchResults }: SearchResultsDisplayProps) {
+  const [isExpanded, setIsExpanded] = useState(true);
+  const [expandedResults, setExpandedResults] = useState<Record<string, boolean>>({});
+
+  if (!searchResults || searchResults.length === 0) {
+    return null;
+  }
+
+  const toggleResult = (pageIndex: number, resultIndex: number) => {
+    const key = `${pageIndex}-${resultIndex}`;
+    setExpandedResults((prev) => ({
+      ...prev,
+      [key]: !prev[key],
+    }));
+  };
+
+  const totalResults = searchResults.reduce((sum, page) => sum + page.data.length, 0);
+
+  return (
+    <div className="search-results-content mt-1 mb-2">
+      <Button
+        type="text"
+        className="flex items-center text-xs text-gray-500 hover:text-gray-700"
+        onClick={() => setIsExpanded(!isExpanded)}
+        icon={<DatabaseOutlined />}
+      >
+        {isExpanded ? "Hide sources" : `Show sources (${totalResults})`}
+        {isExpanded ? <DownOutlined className="ml-1" /> : <RightOutlined className="ml-1" />}
+      </Button>
+
+      {isExpanded && (
+        <div className="mt-2 p-3 bg-gray-50 border border-gray-200 rounded-md text-sm">
+          <div className="space-y-3">
+            {searchResults.map((resultPage, pageIndex) => (
+              <div key={pageIndex}>
+                <div className="text-xs text-gray-600 mb-2 flex items-center gap-2">
+                  <span className="font-medium">Query:</span>
+                  <span className="italic">&quot;{resultPage.search_query}&quot;</span>
+                  <span className="text-gray-400">•</span>
+                  <span className="text-gray-500">{resultPage.data.length} result{resultPage.data.length !== 1 ? 's' : ''}</span>
+                </div>
+
+                <div className="space-y-2">
+                  {resultPage.data.map((result, resultIndex) => {
+                    const isResultExpanded = expandedResults[`${pageIndex}-${resultIndex}`] || false;
+
+                    return (
+                      <div key={resultIndex} className="border border-gray-200 rounded-md overflow-hidden bg-white">
+                        <div
+                          className="flex items-center justify-between p-2 cursor-pointer hover:bg-gray-50 transition-colors"
+                          onClick={() => toggleResult(pageIndex, resultIndex)}
+                        >
+                          <div className="flex items-center gap-2 flex-1 min-w-0">
+                            <svg
+                              className={`w-4 h-4 text-gray-400 transition-transform flex-shrink-0 ${isResultExpanded ? "transform rotate-90" : ""}`}
+                              fill="none"
+                              stroke="currentColor"
+                              viewBox="0 0 24 24"
+                            >
+                              <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
+                            </svg>
+                            <FileTextOutlined className="text-gray-400 flex-shrink-0" style={{ fontSize: "12px" }} />
+                            <span className="text-xs font-medium text-gray-700 truncate">
+                              {result.filename || result.file_id || `Result ${resultIndex + 1}`}
+                            </span>
+                            <span className="text-xs px-2 py-0.5 rounded bg-blue-100 text-blue-700 font-mono flex-shrink-0">
+                              {result.score.toFixed(3)}
+                            </span>
+                          </div>
+                        </div>
+
+                        {isResultExpanded && (
+                          <div className="border-t border-gray-200 bg-white">
+                            <div className="p-3 space-y-2">
+                              {result.content.map((content, contentIndex) => (
+                                <div key={contentIndex}>
+                                  <div className="text-xs font-mono bg-gray-50 p-2 rounded text-gray-800 whitespace-pre-wrap break-words">
+                                    {content.text}
+                                  </div>
+                                </div>
+                              ))}
+
+                              {result.attributes && Object.keys(result.attributes).length > 0 && (
+                                <div className="mt-2 pt-2 border-t border-gray-100">
+                                  <div className="text-xs text-gray-500 mb-1 font-medium">Metadata:</div>
+                                  <div className="space-y-1">
+                                    {Object.entries(result.attributes).map(([key, value]) => (
+                                      <div key={key} className="text-xs flex gap-2">
+                                        <span className="text-gray-500 font-medium">{key}:</span>
+                                        <span className="text-gray-700 font-mono break-all">{String(value)}</span>
+                                      </div>
+                                    ))}
+                                  </div>
+                                </div>
+                              )}
+                            </div>
+                          </div>
+                        )}
+                      </div>
+                    );
+                  })}
+                </div>
+              </div>
+            ))}
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}
+
diff --git a/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx b/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx
index 44aea48986bb..7baad24e48c4 100644
--- a/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx
@@ -1,6 +1,7 @@
 import openai from "openai";
 import { ChatCompletionMessageParam } from "openai/resources/chat/completions";
 import { TokenUsage } from "../ResponseMetrics";
+import { VectorStoreSearchResponse } from "../types";
 import { getProxyBaseUrl } from "@/components/networking";
 
 export async function makeOpenAIChatCompletionRequest(
@@ -18,6 +19,7 @@ export async function makeOpenAIChatCompletionRequest(
   guardrails?: string[],
   selectedMCPTools?: string[],
   onImageGenerated?: (imageUrl: string, model?: string) => void,
+  onSearchResults?: (searchResults: VectorStoreSearchResponse[]) => void,
 ) {
   // base url should be the current base_url
   const isLocal = process.env.NODE_ENV === "development";
@@ -127,6 +129,12 @@ export async function makeOpenAIChatCompletionRequest(
         fullReasoningContent += reasoningContent;
       }
 
+      // Check for search results in provider_specific_fields
+      if (delta && delta.provider_specific_fields?.search_results && onSearchResults) {
+        console.log("Search results found:", delta.provider_specific_fields.search_results);
+        onSearchResults(delta.provider_specific_fields.search_results);
+      }
+
       // Check for usage data using type assertion
       const chunkWithUsage = chunk as any;
       if (chunkWithUsage.usage && onUsageData) {
diff --git a/ui/litellm-dashboard/src/components/chat_ui/types.ts b/ui/litellm-dashboard/src/components/chat_ui/types.ts
index 454256e56cb1..e8a98473cf40 100644
--- a/ui/litellm-dashboard/src/components/chat_ui/types.ts
+++ b/ui/litellm-dashboard/src/components/chat_ui/types.ts
@@ -56,6 +56,20 @@ export interface StreamingResponse {
   usage?: Usage;
 }
 
+export interface VectorStoreSearchResult {
+  score: number;
+  content: Array<{ text: string; type: string }>;
+  file_id?: string;
+  filename?: string;
+  attributes?: Record<string, any>;
+}
+
+export interface VectorStoreSearchResponse {
+  object: string;
+  search_query: string;
+  data: VectorStoreSearchResult[];
+}
+
 export interface MessageType {
   role: string;
   content: string | MultimodalContent[];
@@ -75,6 +89,7 @@ export interface MessageType {
     url: string;
     detail: string;
   };
+  searchResults?: VectorStoreSearchResponse[];
 }
 
 export interface MultimodalContent {