Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 213 additions & 0 deletions docs/my-website/docs/completion/knowledgebase.md
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,219 @@ This is sent to: `https://bedrock-agent-runtime.{aws_region}.amazonaws.com/knowl

This process happens automatically whenever you include the `vector_store_ids` parameter in your request.

## Accessing Search Results (Citations)

When using vector stores, LiteLLM automatically returns search results in `provider_specific_fields`. This allows you to show users citations for the AI's response.

### Key Concept

Search results are always in: `response.choices[0].message.provider_specific_fields["search_results"]`

For streaming: Results appear in the **final chunk** when `finish_reason == "stop"`

### Non-Streaming Example


**Non-Streaming Response with search results:**

```json
{
"id": "chatcmpl-abc123",
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": "LiteLLM is a platform...",
"provider_specific_fields": {
"search_results": [{
"search_query": "What is litellm?",
"data": [{
"score": 0.95,
"content": [{"text": "...", "type": "text"}],
"filename": "litellm-docs.md",
"file_id": "doc-123"
}]
}]
}
},
"finish_reason": "stop"
}]
}
```

<Tabs>
<TabItem value="python-sdk" label="Python SDK">

```python
from openai import OpenAI

client = OpenAI(
base_url="http://localhost:4000",
api_key="your-litellm-api-key"
)

response = client.chat.completions.create(
model="claude-3-5-sonnet",
messages=[{"role": "user", "content": "What is litellm?"}],
tools=[{"type": "file_search", "vector_store_ids": ["T37J8R4WTM"]}]
)

# Get AI response
print(response.choices[0].message.content)

# Get search results (citations)
search_results = response.choices[0].message.provider_specific_fields.get("search_results", [])

for result_page in search_results:
for idx, item in enumerate(result_page['data'], 1):
print(f"[{idx}] {item.get('filename', 'Unknown')} (score: {item['score']:.2f})")
```

</TabItem>

<TabItem value="typescript" label="TypeScript SDK">

```typescript
import OpenAI from 'openai';

const client = new OpenAI({
baseURL: 'http://localhost:4000',
apiKey: process.env.LITELLM_API_KEY
});

const response = await client.chat.completions.create({
model: 'claude-3-5-sonnet',
messages: [{ role: 'user', content: 'What is litellm?' }],
tools: [{ type: 'file_search', vector_store_ids: ['T37J8R4WTM'] }]
});

// Get AI response
console.log(response.choices[0].message.content);

// Get search results (citations)
const message = response.choices[0].message as any;
const searchResults = message.provider_specific_fields?.search_results || [];

searchResults.forEach((page: any) => {
page.data.forEach((item: any, idx: number) => {
console.log(`[${idx + 1}] ${item.filename || 'Unknown'} (${item.score.toFixed(2)})`);
});
});
```

</TabItem>
</Tabs>

### Streaming Example

**Streaming Response with search results (final chunk):**

```json
{
"id": "chatcmpl-abc123",
"choices": [{
"index": 0,
"delta": {
"provider_specific_fields": {
"search_results": [{
"search_query": "What is litellm?",
"data": [{
"score": 0.95,
"content": [{"text": "...", "type": "text"}],
"filename": "litellm-docs.md",
"file_id": "doc-123"
}]
}]
}
},
"finish_reason": "stop"
}]
}
```

<Tabs>
<TabItem value="python-sdk" label="Python SDK">

```python
from openai import OpenAI

client = OpenAI(
base_url="http://localhost:4000",
api_key="your-litellm-api-key"
)

stream = client.chat.completions.create(
model="claude-3-5-sonnet",
messages=[{"role": "user", "content": "What is litellm?"}],
tools=[{"type": "file_search", "vector_store_ids": ["T37J8R4WTM"]}],
stream=True
)

for chunk in stream:
# Stream content
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)

# Get citations in final chunk
if chunk.choices[0].finish_reason == "stop":
search_results = getattr(chunk.choices[0].delta, 'provider_specific_fields', {}).get('search_results', [])
if search_results:
print("\n\nSources:")
for page in search_results:
for idx, item in enumerate(page['data'], 1):
print(f" [{idx}] {item.get('filename', 'Unknown')} ({item['score']:.2f})")
```

</TabItem>

<TabItem value="typescript" label="TypeScript SDK">

```typescript
import OpenAI from 'openai';

const stream = await client.chat.completions.create({
model: 'claude-3-5-sonnet',
messages: [{ role: 'user', content: 'What is litellm?' }],
tools: [{ type: 'file_search', vector_store_ids: ['T37J8R4WTM'] }],
stream: true
});

for await (const chunk of stream) {
// Stream content
if (chunk.choices[0]?.delta?.content) {
process.stdout.write(chunk.choices[0].delta.content);
}

// Get citations in final chunk
if (chunk.choices[0]?.finish_reason === 'stop') {
const searchResults = (chunk.choices[0].delta as any).provider_specific_fields?.search_results || [];
if (searchResults.length > 0) {
console.log('\n\nSources:');
searchResults.forEach((page: any) => {
page.data.forEach((item: any, idx: number) => {
console.log(` [${idx + 1}] ${item.filename || 'Unknown'} (${item.score.toFixed(2)})`);
});
});
}
}
}
```

</TabItem>
</Tabs>

### Search Result Fields

| Field | Type | Description |
|-------|------|-------------|
| `search_query` | string | The query used to search the vector store |
| `data` | array | Array of search results |
| `data[].score` | float | Relevance score (0-1, higher is more relevant) |
| `data[].content` | array | Content chunks with `text` and `type` |
| `data[].filename` | string | Name of the source file (optional) |
| `data[].file_id` | string | Identifier for the source file (optional) |
| `data[].attributes` | object | Provider-specific metadata (optional) |

## API Reference

### LiteLLM Completion Knowledge Base Parameters
Expand Down
9 changes: 8 additions & 1 deletion docs/my-website/docs/providers/bedrock_vector_store.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,14 @@ print(response.choices[0].message.content)
</Tabs>


Futher Reading Vector Stores:
## Accessing Search Results

See how to access vector store search results in your response:
- [Accessing Search Results (Non-Streaming & Streaming)](../completion/knowledgebase#accessing-search-results-citations)

## Further Reading

Vector Stores:
- [Always on Vector Stores](https://docs.litellm.ai/docs/completion/knowledgebase#always-on-for-a-model)
- [Listing available vector stores on litellm proxy](https://docs.litellm.ai/docs/completion/knowledgebase#listing-available-vector-stores)
- [How LiteLLM Vector Stores Work](https://docs.litellm.ai/docs/completion/knowledgebase#how-it-works)
2 changes: 1 addition & 1 deletion docs/my-website/docs/proxy/deploy.md
Original file line number Diff line number Diff line change
Expand Up @@ -788,7 +788,7 @@ docker run --name litellm-proxy \
## Platform-specific Guide

<Tabs>
<TabItem value="AWS ECS" label="AWS ECS - Elastic Container Service>
<TabItem value="AWS ECS" label="AWS ECS - Elastic Container Service">

### Terraform-based ECS Deployment

Expand Down
4 changes: 2 additions & 2 deletions docs/my-website/release_notes/v1.78.0-stable/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@ import TabItem from '@theme/TabItem';
docker run \
-e STORE_MODEL_IN_DB=True \
-p 4000:4000 \
ghcr.io/berriai/litellm:v1.78.0.rc.1
ghcr.io/berriai/litellm:v1.78.0.rc.2
```

</TabItem>

<TabItem value="pip" label="Pip">

``` showLineNumbers title="pip install litellm"
pip install litellm==1.78.0.rc.1
pip install litellm==1.78.0.rc.2
```

</TabItem>
Expand Down
13 changes: 13 additions & 0 deletions litellm/integrations/custom_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,19 @@ async def async_post_call_success_deployment_hook(
"""
pass

async def async_post_call_streaming_deployment_hook(
self,
request_data: dict,
response_chunk: Any,
call_type: Optional[CallTypes],
) -> Optional[Any]:
"""
Allow modifying streaming chunks just before they're returned to the user.

This is called for each streaming chunk in the response.
"""
pass

#### Fallback Events - router/proxy only ####
async def log_model_group_rate_limit_error(
self, exception: Exception, original_model_group: Optional[str], kwargs: dict
Expand Down
Loading
Loading