Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
2c3561e
init S3VectorsRAGIngestion as a supported ingestion provider for RAG API
ishaan-jaff Jan 27, 2026
9e7b01f
test: TestRAGS3Vectors
ishaan-jaff Jan 27, 2026
fbd926e
init S3VectorsVectorStoreOptions
ishaan-jaff Jan 27, 2026
460b4f8
init s3 vectors
ishaan-jaff Jan 27, 2026
765ae3c
code clean up + QA
ishaan-jaff Jan 27, 2026
5f97c0c
fix: get_credentials
ishaan-jaff Jan 27, 2026
5da60d5
S3VectorsRAGIngestion
ishaan-jaff Jan 27, 2026
b385197
TestRAGS3Vectors
ishaan-jaff Jan 27, 2026
273b4b1
docs: AWS S3 Vectors
ishaan-jaff Jan 27, 2026
a3d795b
add asyncio QA checks
ishaan-jaff Jan 27, 2026
7aa11ec
fix: S3_VECTORS_DEFAULT_DIMENSION
ishaan-jaff Jan 27, 2026
fc4a484
init ui for bedrock s3 vectors
ishaan-jaff Jan 27, 2026
5c0a34a
fix add /search support for s3_vectors
ishaan-jaff Jan 27, 2026
b3eb50a
init atransform_search_vector_store_request
ishaan-jaff Jan 27, 2026
608dcc4
feat: S3VectorsVectorStoreConfig
ishaan-jaff Jan 27, 2026
2abad46
TestS3VectorsVectorStoreConfig
ishaan-jaff Jan 27, 2026
145b3ba
atransform_search_vector_store_request
ishaan-jaff Jan 27, 2026
f03f1fd
fix: S3VectorsVectorStoreConfig
ishaan-jaff Jan 27, 2026
1aa47fb
add validation for bucket name etd
ishaan-jaff Jan 27, 2026
71d1b82
fix UI validation for s3 vector store
ishaan-jaff Jan 27, 2026
aba122f
init extract_text_from_pdf
ishaan-jaff Jan 27, 2026
51474e5
add pypdf
ishaan-jaff Jan 27, 2026
db5e183
Merge branch 'main' into litellm_add_rag_api_s3_vectors_on_ui
ishaan-jaff Jan 28, 2026
fb7943e
fix code QA checks
ishaan-jaff Jan 28, 2026
9cd5ebe
fix navbar
ishaan-jaff Jan 28, 2026
7c53173
init s3_vector.png
ishaan-jaff Jan 28, 2026
871fb26
fix QA code
ishaan-jaff Jan 28, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion litellm/llms/base_llm/vector_store/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

from litellm.types.router import GenericLiteLLMParams
from litellm.types.vector_stores import (
BaseVectorStoreAuthCredentials,
VECTOR_STORE_OPENAI_PARAMS,
BaseVectorStoreAuthCredentials,
VectorStoreCreateOptionalRequestParams,
VectorStoreCreateResponse,
VectorStoreIndexEndpoints,
Expand Down Expand Up @@ -64,6 +64,30 @@ def transform_search_vector_store_request(

pass

async def atransform_search_vector_store_request(
self,
vector_store_id: str,
query: Union[str, List[str]],
vector_store_search_optional_params: VectorStoreSearchOptionalRequestParams,
api_base: str,
litellm_logging_obj: LiteLLMLoggingObj,
litellm_params: dict,
) -> Tuple[str, Dict]:
"""
Optional async version of transform_search_vector_store_request.
If not implemented, the handler will fall back to the sync version.
Providers that need to make async calls (e.g., generating embeddings) should override this.
"""
# Default implementation: call the sync version
return self.transform_search_vector_store_request(
vector_store_id=vector_store_id,
query=query,
vector_store_search_optional_params=vector_store_search_optional_params,
api_base=api_base,
litellm_logging_obj=litellm_logging_obj,
litellm_params=litellm_params,
)

@abstractmethod
def transform_search_vector_store_response(
self, response: httpx.Response, litellm_logging_obj: LiteLLMLoggingObj
Expand Down
2 changes: 1 addition & 1 deletion litellm/llms/bedrock/base_aws_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1163,7 +1163,7 @@ def _filter_headers_for_aws_signature(self, headers: dict) -> dict:

def _sign_request(
self,
service_name: Literal["bedrock", "sagemaker", "bedrock-agentcore"],
service_name: Literal["bedrock", "sagemaker", "bedrock-agentcore", "s3vectors"],
headers: dict,
optional_params: dict,
request_data: dict,
Expand Down
36 changes: 25 additions & 11 deletions litellm/llms/custom_httpx/llm_http_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7033,17 +7033,31 @@ async def async_vector_store_search_handler(
litellm_params=dict(litellm_params),
)

(
url,
request_body,
) = vector_store_provider_config.transform_search_vector_store_request(
vector_store_id=vector_store_id,
query=query,
vector_store_search_optional_params=vector_store_search_optional_params,
api_base=api_base,
litellm_logging_obj=logging_obj,
litellm_params=dict(litellm_params),
)
# Check if provider has async transform method
if hasattr(vector_store_provider_config, "atransform_search_vector_store_request"):
(
url,
request_body,
) = await vector_store_provider_config.atransform_search_vector_store_request(
vector_store_id=vector_store_id,
query=query,
vector_store_search_optional_params=vector_store_search_optional_params,
api_base=api_base,
litellm_logging_obj=logging_obj,
litellm_params=dict(litellm_params),
)
else:
(
url,
request_body,
) = vector_store_provider_config.transform_search_vector_store_request(
vector_store_id=vector_store_id,
query=query,
vector_store_search_optional_params=vector_store_search_optional_params,
api_base=api_base,
litellm_logging_obj=logging_obj,
litellm_params=dict(litellm_params),
)
all_optional_params: Dict[str, Any] = dict(litellm_params)
all_optional_params.update(vector_store_search_optional_params or {})
headers, signed_json_body = vector_store_provider_config.sign_request(
Expand Down
1 change: 1 addition & 0 deletions litellm/llms/s3_vectors/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# S3 Vectors LLM integration
1 change: 1 addition & 0 deletions litellm/llms/s3_vectors/vector_stores/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# S3 Vectors vector store integration
254 changes: 254 additions & 0 deletions litellm/llms/s3_vectors/vector_stores/transformation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union

import httpx

from litellm.llms.base_llm.vector_store.transformation import BaseVectorStoreConfig
from litellm.llms.bedrock.base_aws_llm import BaseAWSLLM
from litellm.types.router import GenericLiteLLMParams
from litellm.types.vector_stores import (
VECTOR_STORE_OPENAI_PARAMS,
BaseVectorStoreAuthCredentials,
VectorStoreIndexEndpoints,
VectorStoreResultContent,
VectorStoreSearchOptionalRequestParams,
VectorStoreSearchResponse,
VectorStoreSearchResult,
)

if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
else:
LiteLLMLoggingObj = Any


class S3VectorsVectorStoreConfig(BaseVectorStoreConfig, BaseAWSLLM):
"""Vector store configuration for AWS S3 Vectors."""

def __init__(self) -> None:
BaseVectorStoreConfig.__init__(self)
BaseAWSLLM.__init__(self)

def get_auth_credentials(
self, litellm_params: dict
) -> BaseVectorStoreAuthCredentials:
return {}

def get_vector_store_endpoints_by_type(self) -> VectorStoreIndexEndpoints:
return {
"read": [("POST", "/QueryVectors")],
"write": [],
}

def get_supported_openai_params(
self, model: str
) -> List[VECTOR_STORE_OPENAI_PARAMS]:
return ["max_num_results"]

def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
drop_params: bool,
) -> dict:
for param, value in non_default_params.items():
if param == "max_num_results":
optional_params["maxResults"] = value
return optional_params

def validate_environment(
self, headers: dict, litellm_params: Optional[GenericLiteLLMParams]
) -> dict:
headers = headers or {}
headers.setdefault("Content-Type", "application/json")
return headers

def get_complete_url(self, api_base: Optional[str], litellm_params: dict) -> str:
aws_region_name = litellm_params.get("aws_region_name")
if not aws_region_name:
raise ValueError("aws_region_name is required for S3 Vectors")
return f"https://s3vectors.{aws_region_name}.api.aws"

def transform_search_vector_store_request(
self,
vector_store_id: str,
query: Union[str, List[str]],
vector_store_search_optional_params: VectorStoreSearchOptionalRequestParams,
api_base: str,
litellm_logging_obj: LiteLLMLoggingObj,
litellm_params: dict,
) -> Tuple[str, Dict]:
"""Sync version - generates embedding synchronously."""
# For S3 Vectors, vector_store_id should be in format: bucket_name:index_name
# If not in that format, try to construct it from litellm_params
bucket_name: str
index_name: str

if ":" in vector_store_id:
bucket_name, index_name = vector_store_id.split(":", 1)
else:
# Try to get bucket_name from litellm_params
bucket_name_from_params = litellm_params.get("vector_bucket_name")
if not bucket_name_from_params or not isinstance(bucket_name_from_params, str):
raise ValueError(
"vector_store_id must be in format 'bucket_name:index_name' for S3 Vectors, "
"or vector_bucket_name must be provided in litellm_params"
)
bucket_name = bucket_name_from_params
index_name = vector_store_id

if isinstance(query, list):
query = " ".join(query)

# Generate embedding for the query
embedding_model = litellm_params.get("embedding_model", "text-embedding-3-small")

import litellm as litellm_module
embedding_response = litellm_module.embedding(model=embedding_model, input=[query])
query_embedding = embedding_response.data[0]["embedding"]

url = f"{api_base}/QueryVectors"

request_body: Dict[str, Any] = {
"vectorBucketName": bucket_name,
"indexName": index_name,
"queryVector": {"float32": query_embedding},
"topK": vector_store_search_optional_params.get("max_num_results", 5), # Default to 5
"returnDistance": True,
"returnMetadata": True,
}

litellm_logging_obj.model_call_details["query"] = query
return url, request_body

async def atransform_search_vector_store_request(
self,
vector_store_id: str,
query: Union[str, List[str]],
vector_store_search_optional_params: VectorStoreSearchOptionalRequestParams,
api_base: str,
litellm_logging_obj: LiteLLMLoggingObj,
litellm_params: dict,
) -> Tuple[str, Dict]:
"""Async version - generates embedding asynchronously."""
# For S3 Vectors, vector_store_id should be in format: bucket_name:index_name
# If not in that format, try to construct it from litellm_params
bucket_name: str
index_name: str

if ":" in vector_store_id:
bucket_name, index_name = vector_store_id.split(":", 1)
else:
# Try to get bucket_name from litellm_params
bucket_name_from_params = litellm_params.get("vector_bucket_name")
if not bucket_name_from_params or not isinstance(bucket_name_from_params, str):
raise ValueError(
"vector_store_id must be in format 'bucket_name:index_name' for S3 Vectors, "
"or vector_bucket_name must be provided in litellm_params"
)
bucket_name = bucket_name_from_params
index_name = vector_store_id

if isinstance(query, list):
query = " ".join(query)

# Generate embedding for the query asynchronously
embedding_model = litellm_params.get("embedding_model", "text-embedding-3-small")

import litellm as litellm_module
embedding_response = await litellm_module.aembedding(model=embedding_model, input=[query])
query_embedding = embedding_response.data[0]["embedding"]

url = f"{api_base}/QueryVectors"

request_body: Dict[str, Any] = {
"vectorBucketName": bucket_name,
"indexName": index_name,
"queryVector": {"float32": query_embedding},
"topK": vector_store_search_optional_params.get("max_num_results", 5), # Default to 5
"returnDistance": True,
"returnMetadata": True,
}

litellm_logging_obj.model_call_details["query"] = query
return url, request_body

def sign_request(
self,
headers: dict,
optional_params: Dict,
request_data: Dict,
api_base: str,
api_key: Optional[str] = None,
) -> Tuple[dict, Optional[bytes]]:
return self._sign_request(
service_name="s3vectors",
headers=headers,
optional_params=optional_params,
request_data=request_data,
api_base=api_base,
api_key=api_key,
)

def transform_search_vector_store_response(
self, response: httpx.Response, litellm_logging_obj: LiteLLMLoggingObj
) -> VectorStoreSearchResponse:
try:
response_data = response.json()
results: List[VectorStoreSearchResult] = []

for item in response_data.get("vectors", []) or []:
metadata = item.get("metadata", {}) or {}
source_text = metadata.get("source_text", "")

if not source_text:
continue

# Extract file information from metadata
chunk_index = metadata.get("chunk_index", "0")
file_id = f"s3-vectors-chunk-{chunk_index}"
filename = metadata.get("filename", f"document-{chunk_index}")

# S3 Vectors returns distance, convert to similarity score (0-1)
# Lower distance = higher similarity
# We'll normalize using 1 / (1 + distance) to get a 0-1 score
distance = item.get("distance")
score = None
if distance is not None:
# Convert distance to similarity score between 0 and 1
# For cosine distance: similarity = 1 - distance
# For euclidean: use 1 / (1 + distance)
# Assuming cosine distance here
score = max(0.0, min(1.0, 1.0 - float(distance)))

results.append(
VectorStoreSearchResult(
score=score,
content=[VectorStoreResultContent(text=source_text, type="text")],
file_id=file_id,
filename=filename,
attributes=metadata,
)
)

return VectorStoreSearchResponse(
object="vector_store.search_results.page",
search_query=litellm_logging_obj.model_call_details.get("query", ""),
data=results,
)
except Exception as e:
raise self.get_error_class(
error_message=str(e),
status_code=response.status_code,
headers=response.headers,
)

# Vector store creation is not yet implemented
def transform_create_vector_store_request(
self,
vector_store_create_optional_params,
api_base: str,
) -> Tuple[str, Dict]:
raise NotImplementedError

def transform_create_vector_store_response(self, response: httpx.Response):
raise NotImplementedError
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 9 additions & 0 deletions litellm/proxy/rag_endpoints/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,14 @@ async def _save_vector_store_to_db_from_rag_ingest(
litellm_vector_store_params = ingest_options.get("litellm_vector_store_params", {})
custom_vector_store_name = litellm_vector_store_params.get("vector_store_name")
custom_vector_store_description = litellm_vector_store_params.get("vector_store_description")

# Extract provider-specific params from vector_store_config to save as litellm_params
# This ensures params like aws_region_name, embedding_model, etc. are available for search
provider_specific_params = {}
excluded_keys = {"custom_llm_provider", "vector_store_id"}
for key, value in vector_store_config.items():
if key not in excluded_keys and value is not None:
provider_specific_params[key] = value

# Build file metadata entry using helper
file_entry = _build_file_metadata_entry(
Expand Down Expand Up @@ -167,6 +175,7 @@ async def _save_vector_store_to_db_from_rag_ingest(
vector_store_name=vector_store_name,
vector_store_description=vector_store_description,
vector_store_metadata=initial_metadata,
litellm_params=provider_specific_params if provider_specific_params else None,
)

verbose_proxy_logger.info(
Expand Down
Loading
Loading