Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
e820162
Implement fix for thinking_blocks and converse API calls
lcfyi Oct 5, 2025
01d672f
Add thinking literal
lcfyi Oct 5, 2025
f4557be
Fix mypy issues
lcfyi Oct 5, 2025
af6b425
Type fix for redacted thinking
lcfyi Oct 5, 2025
c4d2c0a
Add voyage model integration in sagemaker
Sameerlite Oct 9, 2025
aeceb19
Add config file logic
Sameerlite Oct 9, 2025
aed48af
Use already exiting voyage transformation
Sameerlite Oct 10, 2025
6cff936
refactor code as per comments
Sameerlite Oct 13, 2025
8d6877b
fix merge error
Sameerlite Oct 13, 2025
04fdc48
refactor code as per comments
Sameerlite Oct 13, 2025
349635f
refactor code as per comments
Sameerlite Oct 13, 2025
a5ae176
UI new build
ishaan-jaff Oct 11, 2025
8517e87
[Fix] router - regression when adding/removing models (#15451)
AlexsanderHamir Oct 12, 2025
02389ff
fix(prometheus): Fix Prometheus metric collection in a multi-workers …
LoadingZhang Oct 12, 2025
3d4b956
Add tiered pricing and cost calculation for xai
Sameerlite Oct 9, 2025
71fc43d
Use generic cost calculator
Sameerlite Oct 10, 2025
d526b41
Resolve conflicts in generated HTML files
Sameerlite Oct 13, 2025
1b3d998
Remove penalty params as supported params for gemini preview model (#…
Sameerlite Oct 13, 2025
b64b540
fix conversion of thinking block
Sameerlite Oct 13, 2025
8f36e80
add application level encryption in SQS (#15512)
deepanshululla Oct 14, 2025
a5ca5ff
[Feat] Bedrock Knowledgebase - return search_response when using /cha…
ishaan-jaff Oct 14, 2025
825e63d
[Feat] Add dynamic rate limits on LiteLLM Gateway (#15518)
ishaan-jaff Oct 14, 2025
6543cb2
Add google rerank endpoint
Sameerlite Oct 14, 2025
5da242e
Add docs
Sameerlite Oct 14, 2025
acb2d2d
fix mypy error
Sameerlite Oct 14, 2025
39e8e40
fix mypy and lint errors
Sameerlite Oct 14, 2025
edd05d9
Add haiku 4.5 integration
Sameerlite Oct 17, 2025
a7a4bee
Add haiku 4.5 integration for other regions as well
Sameerlite Oct 17, 2025
deed425
Handle citation field correctly
Sameerlite Oct 16, 2025
1b173d0
Fix filtering headers for signature calcs
Sameerlite Oct 16, 2025
5a637cc
Add haiku 4.5 integration (#15650)
Sameerlite Oct 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 213 additions & 0 deletions docs/my-website/docs/completion/knowledgebase.md
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,219 @@ This is sent to: `https://bedrock-agent-runtime.{aws_region}.amazonaws.com/knowl

This process happens automatically whenever you include the `vector_store_ids` parameter in your request.

## Accessing Search Results (Citations)

When using vector stores, LiteLLM automatically returns search results in `provider_specific_fields`. This allows you to show users citations for the AI's response.

### Key Concept

Search results are always in: `response.choices[0].message.provider_specific_fields["search_results"]`

For streaming: Results appear in the **final chunk** when `finish_reason == "stop"`

### Non-Streaming Example


**Non-Streaming Response with search results:**

```json
{
"id": "chatcmpl-abc123",
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": "LiteLLM is a platform...",
"provider_specific_fields": {
"search_results": [{
"search_query": "What is litellm?",
"data": [{
"score": 0.95,
"content": [{"text": "...", "type": "text"}],
"filename": "litellm-docs.md",
"file_id": "doc-123"
}]
}]
}
},
"finish_reason": "stop"
}]
}
```

<Tabs>
<TabItem value="python-sdk" label="Python SDK">

```python
from openai import OpenAI

client = OpenAI(
base_url="http://localhost:4000",
api_key="your-litellm-api-key"
)

response = client.chat.completions.create(
model="claude-3-5-sonnet",
messages=[{"role": "user", "content": "What is litellm?"}],
tools=[{"type": "file_search", "vector_store_ids": ["T37J8R4WTM"]}]
)

# Get AI response
print(response.choices[0].message.content)

# Get search results (citations)
search_results = response.choices[0].message.provider_specific_fields.get("search_results", [])

for result_page in search_results:
for idx, item in enumerate(result_page['data'], 1):
print(f"[{idx}] {item.get('filename', 'Unknown')} (score: {item['score']:.2f})")
```

</TabItem>

<TabItem value="typescript" label="TypeScript SDK">

```typescript
import OpenAI from 'openai';

const client = new OpenAI({
baseURL: 'http://localhost:4000',
apiKey: process.env.LITELLM_API_KEY
});

const response = await client.chat.completions.create({
model: 'claude-3-5-sonnet',
messages: [{ role: 'user', content: 'What is litellm?' }],
tools: [{ type: 'file_search', vector_store_ids: ['T37J8R4WTM'] }]
});

// Get AI response
console.log(response.choices[0].message.content);

// Get search results (citations)
const message = response.choices[0].message as any;
const searchResults = message.provider_specific_fields?.search_results || [];

searchResults.forEach((page: any) => {
page.data.forEach((item: any, idx: number) => {
console.log(`[${idx + 1}] ${item.filename || 'Unknown'} (${item.score.toFixed(2)})`);
});
});
```

</TabItem>
</Tabs>

### Streaming Example

**Streaming Response with search results (final chunk):**

```json
{
"id": "chatcmpl-abc123",
"choices": [{
"index": 0,
"delta": {
"provider_specific_fields": {
"search_results": [{
"search_query": "What is litellm?",
"data": [{
"score": 0.95,
"content": [{"text": "...", "type": "text"}],
"filename": "litellm-docs.md",
"file_id": "doc-123"
}]
}]
}
},
"finish_reason": "stop"
}]
}
```

<Tabs>
<TabItem value="python-sdk" label="Python SDK">

```python
from openai import OpenAI

client = OpenAI(
base_url="http://localhost:4000",
api_key="your-litellm-api-key"
)

stream = client.chat.completions.create(
model="claude-3-5-sonnet",
messages=[{"role": "user", "content": "What is litellm?"}],
tools=[{"type": "file_search", "vector_store_ids": ["T37J8R4WTM"]}],
stream=True
)

for chunk in stream:
# Stream content
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)

# Get citations in final chunk
if chunk.choices[0].finish_reason == "stop":
search_results = getattr(chunk.choices[0].delta, 'provider_specific_fields', {}).get('search_results', [])
if search_results:
print("\n\nSources:")
for page in search_results:
for idx, item in enumerate(page['data'], 1):
print(f" [{idx}] {item.get('filename', 'Unknown')} ({item['score']:.2f})")
```

</TabItem>

<TabItem value="typescript" label="TypeScript SDK">

```typescript
import OpenAI from 'openai';

const stream = await client.chat.completions.create({
model: 'claude-3-5-sonnet',
messages: [{ role: 'user', content: 'What is litellm?' }],
tools: [{ type: 'file_search', vector_store_ids: ['T37J8R4WTM'] }],
stream: true
});

for await (const chunk of stream) {
// Stream content
if (chunk.choices[0]?.delta?.content) {
process.stdout.write(chunk.choices[0].delta.content);
}

// Get citations in final chunk
if (chunk.choices[0]?.finish_reason === 'stop') {
const searchResults = (chunk.choices[0].delta as any).provider_specific_fields?.search_results || [];
if (searchResults.length > 0) {
console.log('\n\nSources:');
searchResults.forEach((page: any) => {
page.data.forEach((item: any, idx: number) => {
console.log(` [${idx + 1}] ${item.filename || 'Unknown'} (${item.score.toFixed(2)})`);
});
});
}
}
}
```

</TabItem>
</Tabs>

### Search Result Fields

| Field | Type | Description |
|-------|------|-------------|
| `search_query` | string | The query used to search the vector store |
| `data` | array | Array of search results |
| `data[].score` | float | Relevance score (0-1, higher is more relevant) |
| `data[].content` | array | Content chunks with `text` and `type` |
| `data[].filename` | string | Name of the source file (optional) |
| `data[].file_id` | string | Identifier for the source file (optional) |
| `data[].attributes` | object | Provider-specific metadata (optional) |

## API Reference

### LiteLLM Completion Knowledge Base Parameters
Expand Down
9 changes: 8 additions & 1 deletion docs/my-website/docs/providers/bedrock_vector_store.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,14 @@ print(response.choices[0].message.content)
</Tabs>


Futher Reading Vector Stores:
## Accessing Search Results

See how to access vector store search results in your response:
- [Accessing Search Results (Non-Streaming & Streaming)](../completion/knowledgebase#accessing-search-results-citations)

## Further Reading

Vector Stores:
- [Always on Vector Stores](https://docs.litellm.ai/docs/completion/knowledgebase#always-on-for-a-model)
- [Listing available vector stores on litellm proxy](https://docs.litellm.ai/docs/completion/knowledgebase#listing-available-vector-stores)
- [How LiteLLM Vector Stores Work](https://docs.litellm.ai/docs/completion/knowledgebase#how-it-works)
100 changes: 99 additions & 1 deletion docs/my-website/docs/providers/vertex.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import TabItem from '@theme/TabItem';
| Provider Route on LiteLLM | `vertex_ai/` |
| Link to Provider Doc | [Vertex AI ↗](https://cloud.google.com/vertex-ai) |
| Base URL | 1. Regional endpoints<br/>`https://{vertex_location}-aiplatform.googleapis.com/`<br/>2. Global endpoints (limited availability)<br/>`https://aiplatform.googleapis.com/`|
| Supported Operations | [`/chat/completions`](#sample-usage), `/completions`, [`/embeddings`](#embedding-models), [`/audio/speech`](#text-to-speech-apis), [`/fine_tuning`](#fine-tuning-apis), [`/batches`](#batch-apis), [`/files`](#batch-apis), [`/images`](#image-generation-models) |
| Supported Operations | [`/chat/completions`](#sample-usage), `/completions`, [`/embeddings`](#embedding-models), [`/audio/speech`](#text-to-speech-apis), [`/fine_tuning`](#fine-tuning-apis), [`/batches`](#batch-apis), [`/files`](#batch-apis), [`/images`](#image-generation-models), [`/rerank`](#rerank-api) |


<br />
Expand Down Expand Up @@ -3114,3 +3114,101 @@ Once that's done, when you deploy the new container in the Google Cloud Run serv


s/o @[Darien Kindlund](https://www.linkedin.com/in/kindlund/) for this tutorial

## **Rerank API**

Vertex AI supports reranking through the Discovery Engine API, providing semantic ranking capabilities for document retrieval.

### Setup

Set your Google Cloud project ID:

```bash
export VERTEXAI_PROJECT="your-project-id"
```

### Usage

```python
from litellm import rerank

# Using the latest model (recommended)
response = rerank(
model="vertex_ai/semantic-ranker-default@latest",
query="What is Google Gemini?",
documents=[
"Gemini is a cutting edge large language model created by Google.",
"The Gemini zodiac symbol often depicts two figures standing side-by-side.",
"Gemini is a constellation that can be seen in the night sky."
],
top_n=2,
return_documents=True # Set to False for ID-only responses
)

# Using specific model versions
response_v003 = rerank(
model="vertex_ai/semantic-ranker-default-003",
query="What is Google Gemini?",
documents=documents,
top_n=2
)

print(response.results)
```

### Parameters

| Parameter | Type | Description |
|-----------|------|-------------|
| `model` | string | Model name (e.g., `vertex_ai/semantic-ranker-default@latest`) |
| `query` | string | Search query |
| `documents` | list | Documents to rank |
| `top_n` | int | Number of top results to return |
| `return_documents` | bool | Return full content (True) or IDs only (False) |

### Supported Models

- `semantic-ranker-default@latest`
- `semantic-ranker-fast@latest`
- `semantic-ranker-default-003`
- `semantic-ranker-default-002`

For detailed model specifications, see the [Google Cloud ranking API documentation](https://cloud.google.com/generative-ai-app-builder/docs/ranking#rank_or_rerank_a_set_of_records_according_to_a_query).

### Proxy Usage

Add to your `config.yaml`:

```yaml
model_list:
- model_name: semantic-ranker-default@latest
litellm_params:
model: vertex_ai/semantic-ranker-default@latest
vertex_ai_project: "your-project-id"
vertex_ai_location: "us-central1"
vertex_ai_credentials: "path/to/service-account.json"
```

Start the proxy:

```bash
litellm --config /path/to/config.yaml
```

Test with curl:

```bash
curl http://0.0.0.0:4000/rerank \
-H "Authorization: Bearer sk-1234" \
-H "Content-Type: application/json" \
-d '{
"model": "semantic-ranker-default@latest",
"query": "What is Google Gemini?",
"documents": [
"Gemini is a cutting edge large language model created by Google.",
"The Gemini zodiac symbol often depicts two figures standing side-by-side.",
"Gemini is a constellation that can be seen in the night sky."
],
"top_n": 2
}'
```
3 changes: 2 additions & 1 deletion docs/my-website/docs/rerank.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,5 @@ curl http://0.0.0.0:4000/rerank \
| HuggingFace| [Usage](../docs/providers/huggingface_rerank) |
| Infinity| [Usage](../docs/providers/infinity) |
| vLLM| [Usage](../docs/providers/vllm#rerank-endpoint) |
| DeepInfra| [Usage](../docs/providers/deepinfra#rerank-endpoint) |
| DeepInfra| [Usage](../docs/providers/deepinfra#rerank-endpoint) |
| Vertex AI| [Usage](../docs/providers/vertex#rerank-api) |
11 changes: 8 additions & 3 deletions enterprise/litellm_enterprise/integrations/prometheus.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# used for /metrics endpoint on LiteLLM Proxy
#### What this does ####
# On success, log events to Prometheus
import os
import sys
from datetime import datetime, timedelta
from typing import (
Expand Down Expand Up @@ -2211,7 +2212,13 @@ def _mount_metrics_endpoint(premium_user: bool):
)

# Create metrics ASGI app
metrics_app = make_asgi_app()
if 'PROMETHEUS_MULTIPROC_DIR' in os.environ:
from prometheus_client import CollectorRegistry, multiprocess
registry = CollectorRegistry()
multiprocess.MultiProcessCollector(registry)
metrics_app = make_asgi_app(registry)
else:
metrics_app = make_asgi_app()

# Mount the metrics app to the app
app.mount("/metrics", metrics_app)
Expand Down Expand Up @@ -2354,15 +2361,13 @@ def get_custom_labels_from_tags(tags: List[str]) -> Dict[str, str]:
}
"""

from litellm.router_utils.pattern_match_deployments import PatternMatchRouter
from litellm.types.integrations.prometheus import _sanitize_prometheus_label_name

configured_tags = litellm.custom_prometheus_tags
if configured_tags is None or len(configured_tags) == 0:
return {}

result: Dict[str, str] = {}
pattern_router = PatternMatchRouter()

for configured_tag in configured_tags:
label_name = _sanitize_prometheus_label_name(f"tag_{configured_tag}")
Expand Down
Loading
Loading