Skip to content

Commit

Permalink
docs: small updates to astra vectorize docs (langflow-ai#2497)
Browse files Browse the repository at this point in the history
small updates to vectorize docs

Co-authored-by: Gabriel Luiz Freitas Almeida <[email protected]>
  • Loading branch information
jordanrfrazier and ogabrielluiz authored Jul 4, 2024
1 parent 05efa58 commit a933139
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 30 deletions.
27 changes: 14 additions & 13 deletions src/backend/base/langflow/components/embeddings/AstraVectorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,37 +51,38 @@ class AstraVectorize(Component):
inputs = [
DropdownInput(
name="provider",
display_name="Provider name",
display_name="Provider",
options=VECTORIZE_PROVIDERS_MAPPING.keys(),
value="",
required=True,
),
MessageTextInput(
name="model_name",
display_name="Model name",
display_name="Model Name",
info=f"The embedding model to use for the selected provider. Each provider has a different set of models "
f"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}",
f"available (https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}",
required=True,
),
SecretStrInput(
name="provider_api_key",
display_name="Provider API Key",
info="An alternative to the Astra Authentication that passes an API key for the provider with each request to Astra DB. This may be used when Vectorize is configured for the collection, but no corresponding provider secret is stored within Astra's key management system.",
),
MessageTextInput(
name="api_key_name",
display_name="API Key name",
display_name="Provider API Key Name",
info="The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters.",
advanced=True,
),
DictInput(
name="authentication",
display_name="Authentication parameters",
display_name="Authentication Parameters",
is_list=True,
advanced=True,
),
SecretStrInput(
name="provider_api_key",
display_name="Provider API Key",
info="An alternative to the Astra Authentication that let you use directly the API key of the provider.",
advanced=True,
),
DictInput(
name="model_parameters",
display_name="Model parameters",
display_name="Model Parameters",
advanced=True,
is_list=True,
),
Expand All @@ -97,7 +98,7 @@ def build_options(self) -> dict[str, Any]:
if api_key_name:
authentication["providerKey"] = api_key_name
return {
# must match exactly astra CollectionVectorServiceOptions
# must match astrapy.info.CollectionVectorServiceOptions
"collection_vector_service_options": {
"provider": provider_value,
"modelName": self.model_name,
Expand Down
36 changes: 19 additions & 17 deletions src/backend/base/langflow/components/vectorstores/AstraDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ class AstraVectorStoreComponent(LCVectorStoreComponent):
name="embedding",
display_name="Embedding or Astra Vectorize",
input_types=["Embeddings", "dict"],
info="Allows either an embedding model or an Astra Vectorize configuration. If Astra Vectorize is already configured for the collection, this field is not required.",
),
StrInput(
name="metadata_indexing_exclude",
Expand Down Expand Up @@ -164,6 +165,7 @@ class AstraVectorStoreComponent(LCVectorStoreComponent):
def _build_vector_store_no_ingest(self):
if self._cached_vectorstore:
return self._cached_vectorstore

try:
from langchain_astradb import AstraDBVectorStore
from langchain_astradb.utils.astradb import SetupMode
Expand Down Expand Up @@ -226,11 +228,6 @@ def _build_vector_store_no_ingest(self):

return vector_store

def build_vector_store(self):
vector_store = self._build_vector_store_no_ingest()
self._add_documents_to_vector_store(vector_store)
return vector_store

def _add_documents_to_vector_store(self, vector_store):
documents = []
for _input in self.ingest_data or []:
Expand All @@ -256,6 +253,18 @@ def _map_search_type(self):
else:
return "similarity"

def _build_search_args(self):
args = {
"k": self.number_of_results,
"score_threshold": self.search_score_threshold,
}

if self.search_filter:
clean_filter = {k: v for k, v in self.search_filter.items() if k and v}
if len(clean_filter) > 0:
args["filter"] = clean_filter
return args

def search_documents(self) -> list[Data]:
vector_store = self._build_vector_store_no_ingest()
self._add_documents_to_vector_store(vector_store)
Expand Down Expand Up @@ -283,21 +292,14 @@ def search_documents(self) -> list[Data]:
logger.debug("No search input provided. Skipping search.")
return []

def _build_search_args(self):
args = {
"k": self.number_of_results,
"score_threshold": self.search_score_threshold,
}

if self.search_filter:
clean_filter = {k: v for k, v in self.search_filter.items() if k and v}
if len(clean_filter) > 0:
args["filter"] = clean_filter
return args

def get_retriever_kwargs(self):
search_args = self._build_search_args()
return {
"search_type": self._map_search_type(),
"search_kwargs": search_args,
}

def build_vector_store(self):
vector_store = self._build_vector_store_no_ingest()
self._add_documents_to_vector_store(vector_store)
return vector_store

0 comments on commit a933139

Please sign in to comment.