-
Notifications
You must be signed in to change notification settings - Fork 17.3k
Community: Adding bulk_size as a setable param for OpenSearchVectorSearch #28325
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
51843d4
9fbccec
11383f4
6c3bd4a
0ee5af5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -402,6 +402,7 @@ def __init__( | |
self.client = _get_opensearch_client(opensearch_url, **kwargs) | ||
self.async_client = _get_async_opensearch_client(opensearch_url, **kwargs) | ||
self.engine = kwargs.get("engine", "nmslib") | ||
self.bulk_size = kwargs.get("bulk_size", 500) | ||
|
||
@property | ||
def embeddings(self) -> Embeddings: | ||
|
@@ -413,10 +414,9 @@ def __add( | |
embeddings: List[List[float]], | ||
metadatas: Optional[List[dict]] = None, | ||
ids: Optional[List[str]] = None, | ||
bulk_size: int = 500, | ||
**kwargs: Any, | ||
) -> List[str]: | ||
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size) | ||
_validate_embeddings_and_bulk_size(len(embeddings), self.bulk_size) | ||
index_name = kwargs.get("index_name", self.index_name) | ||
text_field = kwargs.get("text_field", "text") | ||
dim = len(embeddings[0]) | ||
|
@@ -454,10 +454,9 @@ async def __aadd( | |
embeddings: List[List[float]], | ||
metadatas: Optional[List[dict]] = None, | ||
ids: Optional[List[str]] = None, | ||
bulk_size: int = 500, | ||
**kwargs: Any, | ||
) -> List[str]: | ||
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size) | ||
_validate_embeddings_and_bulk_size(len(embeddings), self.bulk_size) | ||
index_name = kwargs.get("index_name", self.index_name) | ||
text_field = kwargs.get("text_field", "text") | ||
dim = len(embeddings[0]) | ||
|
@@ -560,7 +559,6 @@ def add_texts( | |
texts: Iterable[str], | ||
metadatas: Optional[List[dict]] = None, | ||
ids: Optional[List[str]] = None, | ||
bulk_size: int = 500, | ||
**kwargs: Any, | ||
) -> List[str]: | ||
"""Run more texts through the embeddings and add to the vectorstore. | ||
|
@@ -587,7 +585,7 @@ def add_texts( | |
embeddings, | ||
metadatas=metadatas, | ||
ids=ids, | ||
bulk_size=bulk_size, | ||
bulk_size=self.bulk_size, | ||
**kwargs, | ||
) | ||
|
||
|
@@ -596,7 +594,6 @@ async def aadd_texts( | |
texts: Iterable[str], | ||
metadatas: Optional[List[dict]] = None, | ||
ids: Optional[List[str]] = None, | ||
bulk_size: int = 500, | ||
**kwargs: Any, | ||
) -> List[str]: | ||
""" | ||
|
@@ -609,7 +606,7 @@ async def aadd_texts( | |
embeddings, | ||
metadatas=metadatas, | ||
ids=ids, | ||
bulk_size=bulk_size, | ||
bulk_size=self.bulk_size, | ||
**kwargs, | ||
) | ||
|
||
|
@@ -618,7 +615,6 @@ def add_embeddings( | |
text_embeddings: Iterable[Tuple[str, List[float]]], | ||
metadatas: Optional[List[dict]] = None, | ||
ids: Optional[List[str]] = None, | ||
bulk_size: int = 500, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. breaking change - can we keep this, and use the passed-in as an override? can still default to the |
||
**kwargs: Any, | ||
) -> List[str]: | ||
"""Add the given texts and embeddings to the vectorstore. | ||
|
@@ -646,7 +642,7 @@ def add_embeddings( | |
list(embeddings), | ||
metadatas=metadatas, | ||
ids=ids, | ||
bulk_size=bulk_size, | ||
bulk_size=self.bulk_size, | ||
**kwargs, | ||
) | ||
|
||
|
@@ -1085,7 +1081,6 @@ def from_texts( | |
texts: List[str], | ||
embedding: Embeddings, | ||
metadatas: Optional[List[dict]] = None, | ||
bulk_size: int = 500, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. break |
||
ids: Optional[List[str]] = None, | ||
**kwargs: Any, | ||
) -> OpenSearchVectorSearch: | ||
|
@@ -1139,7 +1134,7 @@ def from_texts( | |
texts, | ||
embedding, | ||
metadatas=metadatas, | ||
bulk_size=bulk_size, | ||
bulk_size=cls.bulk_size, | ||
ids=ids, | ||
**kwargs, | ||
) | ||
|
@@ -1150,7 +1145,6 @@ async def afrom_texts( | |
texts: List[str], | ||
embedding: Embeddings, | ||
metadatas: Optional[List[dict]] = None, | ||
bulk_size: int = 500, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. break |
||
ids: Optional[List[str]] = None, | ||
**kwargs: Any, | ||
) -> OpenSearchVectorSearch: | ||
|
@@ -1204,7 +1198,7 @@ async def afrom_texts( | |
texts, | ||
embedding, | ||
metadatas=metadatas, | ||
bulk_size=bulk_size, | ||
bulk_size=cls.bulk_size, | ||
ids=ids, | ||
**kwargs, | ||
) | ||
|
@@ -1216,7 +1210,6 @@ def from_embeddings( | |
texts: List[str], | ||
embedding: Embeddings, | ||
metadatas: Optional[List[dict]] = None, | ||
bulk_size: int = 500, | ||
ids: Optional[List[str]] = None, | ||
**kwargs: Any, | ||
) -> OpenSearchVectorSearch: | ||
|
@@ -1285,7 +1278,7 @@ def from_embeddings( | |
"max_chunk_bytes", | ||
"is_aoss", | ||
] | ||
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size) | ||
_validate_embeddings_and_bulk_size(len(embeddings), cls.bulk_size) | ||
dim = len(embeddings[0]) | ||
# Get the index name from either from kwargs or ENV Variable | ||
# before falling back to random generation | ||
|
@@ -1346,7 +1339,6 @@ async def afrom_embeddings( | |
texts: List[str], | ||
embedding: Embeddings, | ||
metadatas: Optional[List[dict]] = None, | ||
bulk_size: int = 500, | ||
ids: Optional[List[str]] = None, | ||
**kwargs: Any, | ||
) -> OpenSearchVectorSearch: | ||
|
@@ -1417,7 +1409,7 @@ async def afrom_embeddings( | |
"max_chunk_bytes", | ||
"is_aoss", | ||
] | ||
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size) | ||
_validate_embeddings_and_bulk_size(len(embeddings), cls.bulk_size) | ||
dim = len(embeddings[0]) | ||
# Get the index name from either from kwargs or ENV Variable | ||
# before falling back to random generation | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
break