diff --git a/Makefile b/Makefile index 555d816bc4fe..4d66593a4f65 100644 --- a/Makefile +++ b/Makefile @@ -64,13 +64,11 @@ help: ## show this help message reinstall_backend: ## forces reinstall all dependencies (no caching) @echo 'Installing backend dependencies' - #@poetry install > /dev/null 2>&1 - @cd src/backend/base && uv sync -n --reinstall && cd ../../../ && uv sync -n --reinstall > /dev/null 2>&1 + @uv sync -n --reinstall --frozen install_backend: ## install the backend dependencies @echo 'Installing backend dependencies' - #@poetry install > /dev/null 2>&1 - @cd src/backend/base && uv sync && cd ../../../ && uv sync > /dev/null 2>&1 + @uv sync --frozen install_frontend: ## install the frontend dependencies @echo 'Installing frontend dependencies' @@ -141,7 +139,7 @@ coverage: ## run the tests and generate a coverage report #@poetry run coverage erase unit_tests: ## run unit tests - cd src/backend/base && uv sync --extra dev && cd ../../../ && uv sync --extra dev > /dev/null 2>&1 + @uv sync --extra dev --frozen ifeq ($(async), true) uv run pytest src/backend/tests \ --ignore=src/backend/tests/integration \ diff --git a/pyproject.toml b/pyproject.toml index 22e1cf6b07de..d071b25c311e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ dependencies = [ "pyarrow>=14.0.0", "wikipedia>=1.4.0", "qdrant-client>=1.9.0", - "weaviate-client", + "weaviate-client>=4.8", "cohere>=5.5.3", "faiss-cpu>=1.8.0", "types-cachetools>=5.3.0.5", @@ -132,7 +132,7 @@ local = [ "ctransformers>=0.2.10" ] clickhouse-connect = [ - "clickhouse-connect[clickhouse-connect]==0.7.19" + "clickhouse-connect==0.7.19" ] [project.scripts] @@ -276,7 +276,7 @@ dev-dependencies = [ "ipykernel>=6.29.0", "mypy>=1.11.0", "ruff>=0.6.2,<0.7.0", - "httpx", + "httpx>=0.27.0", "pytest>=8.2.0", "types-requests>=2.32.0", "requests>=2.32.0", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Agent Flow.json b/src/backend/base/langflow/initial_setup/starter_projects/Agent Flow.json index 753ec13e72db..947638cbab4a 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Agent Flow.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Agent Flow.json @@ -1211,7 +1211,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import ast\nimport operator\nfrom typing import List\n\nfrom langchain.tools import StructuredTool\nfrom pydantic import BaseModel, Field\n\nfrom langflow.base.langchain_utilities.model import LCToolComponent\nfrom langflow.field_typing import Tool\nfrom langflow.inputs import MessageTextInput\nfrom langflow.schema import Data\n\n\nclass CalculatorToolComponent(LCToolComponent):\n display_name = \"Calculator\"\n description = \"Perform basic arithmetic operations on a given expression.\"\n icon = \"calculator\"\n name = \"CalculatorTool\"\n\n inputs = [\n MessageTextInput(\n name=\"expression\",\n display_name=\"Expression\",\n info=\"The arithmetic expression to evaluate (e.g., '4*4*(33/22)+12-20').\",\n ),\n ]\n\n class CalculatorToolSchema(BaseModel):\n expression: str = Field(..., description=\"The arithmetic expression to evaluate.\")\n\n def run_model(self) -> List[Data]:\n return self._evaluate_expression(self.expression)\n\n def build_tool(self) -> Tool:\n return StructuredTool.from_function(\n name=\"calculator\",\n description=\"Evaluate basic arithmetic expressions. Input should be a string containing the expression.\",\n func=self._evaluate_expression,\n args_schema=self.CalculatorToolSchema,\n )\n\n def _evaluate_expression(self, expression: str) -> List[Data]:\n try:\n # Define the allowed operators\n operators = {\n ast.Add: operator.add,\n ast.Sub: operator.sub,\n ast.Mult: operator.mul,\n ast.Div: operator.truediv,\n ast.Pow: operator.pow,\n }\n\n def eval_expr(node):\n if isinstance(node, ast.Num):\n return node.n\n elif isinstance(node, ast.BinOp):\n return operators[type(node.op)](eval_expr(node.left), eval_expr(node.right))\n elif isinstance(node, ast.UnaryOp):\n return operators[type(node.op)](eval_expr(node.operand))\n else:\n raise TypeError(node)\n\n # Parse the expression and evaluate it\n tree = ast.parse(expression, mode=\"eval\")\n result = eval_expr(tree.body)\n\n # Format the result to a reasonable number of decimal places\n formatted_result = f\"{result:.6f}\".rstrip(\"0\").rstrip(\".\")\n\n self.status = formatted_result\n return [Data(data={\"result\": formatted_result})]\n\n except (SyntaxError, TypeError, KeyError) as e:\n error_message = f\"Invalid expression: {str(e)}\"\n self.status = error_message\n return [Data(data={\"error\": error_message})]\n except ZeroDivisionError:\n error_message = \"Error: Division by zero\"\n self.status = error_message\n return [Data(data={\"error\": error_message})]\n except Exception as e:\n error_message = f\"Error: {str(e)}\"\n self.status = error_message\n return [Data(data={\"error\": error_message})]\n" + "value": "import ast\nimport operator\n\nfrom langchain.tools import StructuredTool\nfrom pydantic import BaseModel, Field\n\nfrom langflow.base.langchain_utilities.model import LCToolComponent\nfrom langflow.field_typing import Tool\nfrom langflow.inputs import MessageTextInput\nfrom langflow.schema import Data\n\n\nclass CalculatorToolComponent(LCToolComponent):\n display_name = \"Calculator\"\n description = \"Perform basic arithmetic operations on a given expression.\"\n icon = \"calculator\"\n name = \"CalculatorTool\"\n\n inputs = [\n MessageTextInput(\n name=\"expression\",\n display_name=\"Expression\",\n info=\"The arithmetic expression to evaluate (e.g., '4*4*(33/22)+12-20').\",\n ),\n ]\n\n class CalculatorToolSchema(BaseModel):\n expression: str = Field(..., description=\"The arithmetic expression to evaluate.\")\n\n def run_model(self) -> list[Data]:\n return self._evaluate_expression(self.expression)\n\n def build_tool(self) -> Tool:\n return StructuredTool.from_function(\n name=\"calculator\",\n description=\"Evaluate basic arithmetic expressions. Input should be a string containing the expression.\",\n func=self._evaluate_expression,\n args_schema=self.CalculatorToolSchema,\n )\n\n def _evaluate_expression(self, expression: str) -> list[Data]:\n try:\n # Define the allowed operators\n operators = {\n ast.Add: operator.add,\n ast.Sub: operator.sub,\n ast.Mult: operator.mul,\n ast.Div: operator.truediv,\n ast.Pow: operator.pow,\n }\n\n def eval_expr(node):\n if isinstance(node, ast.Num):\n return node.n\n elif isinstance(node, ast.BinOp):\n return operators[type(node.op)](eval_expr(node.left), eval_expr(node.right))\n elif isinstance(node, ast.UnaryOp):\n return operators[type(node.op)](eval_expr(node.operand))\n else:\n raise TypeError(node)\n\n # Parse the expression and evaluate it\n tree = ast.parse(expression, mode=\"eval\")\n result = eval_expr(tree.body)\n\n # Format the result to a reasonable number of decimal places\n formatted_result = f\"{result:.6f}\".rstrip(\"0\").rstrip(\".\")\n\n self.status = formatted_result\n return [Data(data={\"result\": formatted_result})]\n\n except (SyntaxError, TypeError, KeyError) as e:\n error_message = f\"Invalid expression: {str(e)}\"\n self.status = error_message\n return [Data(data={\"error\": error_message})]\n except ZeroDivisionError:\n error_message = \"Error: Division by zero\"\n self.status = error_message\n return [Data(data={\"error\": error_message})]\n except Exception as e:\n error_message = f\"Error: {str(e)}\"\n self.status = error_message\n return [Data(data={\"error\": error_message})]\n" }, "expression": { "_input_type": "MessageTextInput", @@ -1321,7 +1321,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import importlib\nfrom typing import List, Union\n\nfrom langchain.tools import StructuredTool\nfrom langchain_experimental.utilities import PythonREPL\nfrom pydantic import BaseModel, Field\n\nfrom langflow.base.langchain_utilities.model import LCToolComponent\nfrom langflow.field_typing import Tool\nfrom langflow.inputs import StrInput\nfrom langflow.schema import Data\n\n\nclass PythonREPLToolComponent(LCToolComponent):\n display_name = \"Python REPL Tool\"\n description = \"A tool for running Python code in a REPL environment.\"\n name = \"PythonREPLTool\"\n\n inputs = [\n StrInput(\n name=\"name\",\n display_name=\"Tool Name\",\n info=\"The name of the tool.\",\n value=\"python_repl\",\n ),\n StrInput(\n name=\"description\",\n display_name=\"Tool Description\",\n info=\"A description of the tool.\",\n value=\"A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.\",\n ),\n StrInput(\n name=\"global_imports\",\n display_name=\"Global Imports\",\n info=\"A comma-separated list of modules to import globally, e.g. 'math,numpy'.\",\n value=\"math\",\n ),\n StrInput(\n name=\"code\",\n display_name=\"Python Code\",\n info=\"The Python code to execute.\",\n value=\"print('Hello, World!')\",\n ),\n ]\n\n class PythonREPLSchema(BaseModel):\n code: str = Field(..., description=\"The Python code to execute.\")\n\n def get_globals(self, global_imports: Union[str, List[str]]) -> dict:\n global_dict = {}\n if isinstance(global_imports, str):\n modules = [module.strip() for module in global_imports.split(\",\")]\n elif isinstance(global_imports, list):\n modules = global_imports\n else:\n raise ValueError(\"global_imports must be either a string or a list\")\n\n for module in modules:\n try:\n imported_module = importlib.import_module(module)\n global_dict[imported_module.__name__] = imported_module\n except ImportError:\n raise ImportError(f\"Could not import module {module}\")\n return global_dict\n\n def build_tool(self) -> Tool:\n _globals = self.get_globals(self.global_imports)\n python_repl = PythonREPL(_globals=_globals)\n\n def run_python_code(code: str) -> str:\n try:\n return python_repl.run(code)\n except Exception as e:\n return f\"Error: {str(e)}\"\n\n tool = StructuredTool.from_function(\n name=self.name,\n description=self.description,\n func=run_python_code,\n args_schema=self.PythonREPLSchema,\n )\n\n self.status = f\"Python REPL Tool created with global imports: {self.global_imports}\"\n return tool\n\n def run_model(self) -> List[Data]:\n tool = self.build_tool()\n result = tool.run(self.code)\n return [Data(data={\"result\": result})]\n" + "value": "import importlib\n\nfrom langchain.tools import StructuredTool\nfrom langchain_experimental.utilities import PythonREPL\nfrom pydantic import BaseModel, Field\n\nfrom langflow.base.langchain_utilities.model import LCToolComponent\nfrom langflow.field_typing import Tool\nfrom langflow.inputs import StrInput\nfrom langflow.schema import Data\n\n\nclass PythonREPLToolComponent(LCToolComponent):\n display_name = \"Python REPL Tool\"\n description = \"A tool for running Python code in a REPL environment.\"\n name = \"PythonREPLTool\"\n\n inputs = [\n StrInput(\n name=\"name\",\n display_name=\"Tool Name\",\n info=\"The name of the tool.\",\n value=\"python_repl\",\n ),\n StrInput(\n name=\"description\",\n display_name=\"Tool Description\",\n info=\"A description of the tool.\",\n value=\"A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.\",\n ),\n StrInput(\n name=\"global_imports\",\n display_name=\"Global Imports\",\n info=\"A comma-separated list of modules to import globally, e.g. 'math,numpy'.\",\n value=\"math\",\n ),\n StrInput(\n name=\"code\",\n display_name=\"Python Code\",\n info=\"The Python code to execute.\",\n value=\"print('Hello, World!')\",\n ),\n ]\n\n class PythonREPLSchema(BaseModel):\n code: str = Field(..., description=\"The Python code to execute.\")\n\n def get_globals(self, global_imports: str | list[str]) -> dict:\n global_dict = {}\n if isinstance(global_imports, str):\n modules = [module.strip() for module in global_imports.split(\",\")]\n elif isinstance(global_imports, list):\n modules = global_imports\n else:\n raise ValueError(\"global_imports must be either a string or a list\")\n\n for module in modules:\n try:\n imported_module = importlib.import_module(module)\n global_dict[imported_module.__name__] = imported_module\n except ImportError:\n raise ImportError(f\"Could not import module {module}\")\n return global_dict\n\n def build_tool(self) -> Tool:\n _globals = self.get_globals(self.global_imports)\n python_repl = PythonREPL(_globals=_globals)\n\n def run_python_code(code: str) -> str:\n try:\n return python_repl.run(code)\n except Exception as e:\n return f\"Error: {str(e)}\"\n\n tool = StructuredTool.from_function(\n name=self.name,\n description=self.description,\n func=run_python_code,\n args_schema=self.PythonREPLSchema,\n )\n\n self.status = f\"Python REPL Tool created with global imports: {self.global_imports}\"\n return tool\n\n def run_model(self) -> list[Data]:\n tool = self.build_tool()\n result = tool.run(self.code)\n return [Data(data={\"result\": result})]\n" }, "description": { "_input_type": "StrInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Complex Agent.json b/src/backend/base/langflow/initial_setup/starter_projects/Complex Agent.json index aa0012e1b076..5030fda0c5bc 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Complex Agent.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Complex Agent.json @@ -4109,7 +4109,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from typing import Any, Dict, List, Optional\n\nfrom langchain.tools import StructuredTool\nfrom langchain_community.utilities.searchapi import SearchApiAPIWrapper\nfrom pydantic import BaseModel, Field\n\nfrom langflow.base.langchain_utilities.model import LCToolComponent\nfrom langflow.field_typing import Tool\nfrom langflow.inputs import DictInput, IntInput, MessageTextInput, MultilineInput, SecretStrInput\nfrom langflow.schema import Data\n\n\nclass SearchAPIComponent(LCToolComponent):\n display_name: str = \"Search API\"\n description: str = \"Call the searchapi.io API with result limiting\"\n name = \"SearchAPI\"\n documentation: str = \"https://www.searchapi.io/docs/google\"\n\n inputs = [\n MessageTextInput(name=\"engine\", display_name=\"Engine\", value=\"google\"),\n SecretStrInput(name=\"api_key\", display_name=\"SearchAPI API Key\", required=True),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input\",\n ),\n DictInput(name=\"search_params\", display_name=\"Search parameters\", advanced=True, is_list=True),\n IntInput(name=\"max_results\", display_name=\"Max Results\", value=5, advanced=True),\n IntInput(name=\"max_snippet_length\", display_name=\"Max Snippet Length\", value=100, advanced=True),\n ]\n\n class SearchAPISchema(BaseModel):\n query: str = Field(..., description=\"The search query\")\n params: Optional[Dict[str, Any]] = Field(default_factory=dict, description=\"Additional search parameters\")\n max_results: int = Field(5, description=\"Maximum number of results to return\")\n max_snippet_length: int = Field(100, description=\"Maximum length of each result snippet\")\n\n def _build_wrapper(self):\n return SearchApiAPIWrapper(engine=self.engine, searchapi_api_key=self.api_key)\n\n def build_tool(self) -> Tool:\n wrapper = self._build_wrapper()\n\n def search_func(\n query: str, params: Optional[Dict[str, Any]] = None, max_results: int = 5, max_snippet_length: int = 100\n ) -> List[Dict[str, Any]]:\n params = params or {}\n full_results = wrapper.results(query=query, **params)\n organic_results = full_results.get(\"organic_results\", [])[:max_results]\n\n limited_results = []\n for result in organic_results:\n limited_result = {\n \"title\": result.get(\"title\", \"\")[:max_snippet_length],\n \"link\": result.get(\"link\", \"\"),\n \"snippet\": result.get(\"snippet\", \"\")[:max_snippet_length],\n }\n limited_results.append(limited_result)\n\n return limited_results\n\n tool = StructuredTool.from_function(\n name=\"search_api\",\n description=\"Search for recent results using searchapi.io with result limiting\",\n func=search_func,\n args_schema=self.SearchAPISchema,\n )\n\n self.status = f\"Search API Tool created with engine: {self.engine}\"\n return tool\n\n def run_model(self) -> List[Data]:\n tool = self.build_tool()\n results = tool.run(\n {\n \"query\": self.input_value,\n \"params\": self.search_params or {},\n \"max_results\": self.max_results,\n \"max_snippet_length\": self.max_snippet_length,\n }\n )\n\n data_list = [Data(data=result, text=result.get(\"snippet\", \"\")) for result in results]\n\n self.status = data_list\n return data_list\n" + "value": "from typing import Any\n\nfrom langchain.tools import StructuredTool\nfrom langchain_community.utilities.searchapi import SearchApiAPIWrapper\nfrom pydantic import BaseModel, Field\n\nfrom langflow.base.langchain_utilities.model import LCToolComponent\nfrom langflow.field_typing import Tool\nfrom langflow.inputs import DictInput, IntInput, MessageTextInput, MultilineInput, SecretStrInput\nfrom langflow.schema import Data\n\n\nclass SearchAPIComponent(LCToolComponent):\n display_name: str = \"Search API\"\n description: str = \"Call the searchapi.io API with result limiting\"\n name = \"SearchAPI\"\n documentation: str = \"https://www.searchapi.io/docs/google\"\n\n inputs = [\n MessageTextInput(name=\"engine\", display_name=\"Engine\", value=\"google\"),\n SecretStrInput(name=\"api_key\", display_name=\"SearchAPI API Key\", required=True),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input\",\n ),\n DictInput(name=\"search_params\", display_name=\"Search parameters\", advanced=True, is_list=True),\n IntInput(name=\"max_results\", display_name=\"Max Results\", value=5, advanced=True),\n IntInput(name=\"max_snippet_length\", display_name=\"Max Snippet Length\", value=100, advanced=True),\n ]\n\n class SearchAPISchema(BaseModel):\n query: str = Field(..., description=\"The search query\")\n params: dict[str, Any] | None = Field(default_factory=dict, description=\"Additional search parameters\")\n max_results: int = Field(5, description=\"Maximum number of results to return\")\n max_snippet_length: int = Field(100, description=\"Maximum length of each result snippet\")\n\n def _build_wrapper(self):\n return SearchApiAPIWrapper(engine=self.engine, searchapi_api_key=self.api_key)\n\n def build_tool(self) -> Tool:\n wrapper = self._build_wrapper()\n\n def search_func(\n query: str, params: dict[str, Any] | None = None, max_results: int = 5, max_snippet_length: int = 100\n ) -> list[dict[str, Any]]:\n params = params or {}\n full_results = wrapper.results(query=query, **params)\n organic_results = full_results.get(\"organic_results\", [])[:max_results]\n\n limited_results = []\n for result in organic_results:\n limited_result = {\n \"title\": result.get(\"title\", \"\")[:max_snippet_length],\n \"link\": result.get(\"link\", \"\"),\n \"snippet\": result.get(\"snippet\", \"\")[:max_snippet_length],\n }\n limited_results.append(limited_result)\n\n return limited_results\n\n tool = StructuredTool.from_function(\n name=\"search_api\",\n description=\"Search for recent results using searchapi.io with result limiting\",\n func=search_func,\n args_schema=self.SearchAPISchema,\n )\n\n self.status = f\"Search API Tool created with engine: {self.engine}\"\n return tool\n\n def run_model(self) -> list[Data]:\n tool = self.build_tool()\n results = tool.run(\n {\n \"query\": self.input_value,\n \"params\": self.search_params or {},\n \"max_results\": self.max_results,\n \"max_snippet_length\": self.max_snippet_length,\n }\n )\n\n data_list = [Data(data=result, text=result.get(\"snippet\", \"\")) for result in results]\n\n self.status = data_list\n return data_list\n" }, "engine": { "advanced": false, diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Hierarchical Agent.json b/src/backend/base/langflow/initial_setup/starter_projects/Hierarchical Agent.json index 10b55ffeda16..41b0c3465e88 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Hierarchical Agent.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Hierarchical Agent.json @@ -2668,7 +2668,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from typing import Any, Dict, List, Optional\n\nfrom langchain.tools import StructuredTool\nfrom langchain_community.utilities.searchapi import SearchApiAPIWrapper\nfrom pydantic import BaseModel, Field\n\nfrom langflow.base.langchain_utilities.model import LCToolComponent\nfrom langflow.field_typing import Tool\nfrom langflow.inputs import DictInput, IntInput, MessageTextInput, MultilineInput, SecretStrInput\nfrom langflow.schema import Data\n\n\nclass SearchAPIComponent(LCToolComponent):\n display_name: str = \"Search API\"\n description: str = \"Call the searchapi.io API with result limiting\"\n name = \"SearchAPI\"\n documentation: str = \"https://www.searchapi.io/docs/google\"\n\n inputs = [\n MessageTextInput(name=\"engine\", display_name=\"Engine\", value=\"google\"),\n SecretStrInput(name=\"api_key\", display_name=\"SearchAPI API Key\", required=True),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input\",\n ),\n DictInput(name=\"search_params\", display_name=\"Search parameters\", advanced=True, is_list=True),\n IntInput(name=\"max_results\", display_name=\"Max Results\", value=5, advanced=True),\n IntInput(name=\"max_snippet_length\", display_name=\"Max Snippet Length\", value=100, advanced=True),\n ]\n\n class SearchAPISchema(BaseModel):\n query: str = Field(..., description=\"The search query\")\n params: Optional[Dict[str, Any]] = Field(default_factory=dict, description=\"Additional search parameters\")\n max_results: int = Field(5, description=\"Maximum number of results to return\")\n max_snippet_length: int = Field(100, description=\"Maximum length of each result snippet\")\n\n def _build_wrapper(self):\n return SearchApiAPIWrapper(engine=self.engine, searchapi_api_key=self.api_key)\n\n def build_tool(self) -> Tool:\n wrapper = self._build_wrapper()\n\n def search_func(\n query: str, params: Optional[Dict[str, Any]] = None, max_results: int = 5, max_snippet_length: int = 100\n ) -> List[Dict[str, Any]]:\n params = params or {}\n full_results = wrapper.results(query=query, **params)\n organic_results = full_results.get(\"organic_results\", [])[:max_results]\n\n limited_results = []\n for result in organic_results:\n limited_result = {\n \"title\": result.get(\"title\", \"\")[:max_snippet_length],\n \"link\": result.get(\"link\", \"\"),\n \"snippet\": result.get(\"snippet\", \"\")[:max_snippet_length],\n }\n limited_results.append(limited_result)\n\n return limited_results\n\n tool = StructuredTool.from_function(\n name=\"search_api\",\n description=\"Search for recent results using searchapi.io with result limiting\",\n func=search_func,\n args_schema=self.SearchAPISchema,\n )\n\n self.status = f\"Search API Tool created with engine: {self.engine}\"\n return tool\n\n def run_model(self) -> List[Data]:\n tool = self.build_tool()\n results = tool.run(\n {\n \"query\": self.input_value,\n \"params\": self.search_params or {},\n \"max_results\": self.max_results,\n \"max_snippet_length\": self.max_snippet_length,\n }\n )\n\n data_list = [Data(data=result, text=result.get(\"snippet\", \"\")) for result in results]\n\n self.status = data_list\n return data_list\n" + "value": "from typing import Any\n\nfrom langchain.tools import StructuredTool\nfrom langchain_community.utilities.searchapi import SearchApiAPIWrapper\nfrom pydantic import BaseModel, Field\n\nfrom langflow.base.langchain_utilities.model import LCToolComponent\nfrom langflow.field_typing import Tool\nfrom langflow.inputs import DictInput, IntInput, MessageTextInput, MultilineInput, SecretStrInput\nfrom langflow.schema import Data\n\n\nclass SearchAPIComponent(LCToolComponent):\n display_name: str = \"Search API\"\n description: str = \"Call the searchapi.io API with result limiting\"\n name = \"SearchAPI\"\n documentation: str = \"https://www.searchapi.io/docs/google\"\n\n inputs = [\n MessageTextInput(name=\"engine\", display_name=\"Engine\", value=\"google\"),\n SecretStrInput(name=\"api_key\", display_name=\"SearchAPI API Key\", required=True),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input\",\n ),\n DictInput(name=\"search_params\", display_name=\"Search parameters\", advanced=True, is_list=True),\n IntInput(name=\"max_results\", display_name=\"Max Results\", value=5, advanced=True),\n IntInput(name=\"max_snippet_length\", display_name=\"Max Snippet Length\", value=100, advanced=True),\n ]\n\n class SearchAPISchema(BaseModel):\n query: str = Field(..., description=\"The search query\")\n params: dict[str, Any] | None = Field(default_factory=dict, description=\"Additional search parameters\")\n max_results: int = Field(5, description=\"Maximum number of results to return\")\n max_snippet_length: int = Field(100, description=\"Maximum length of each result snippet\")\n\n def _build_wrapper(self):\n return SearchApiAPIWrapper(engine=self.engine, searchapi_api_key=self.api_key)\n\n def build_tool(self) -> Tool:\n wrapper = self._build_wrapper()\n\n def search_func(\n query: str, params: dict[str, Any] | None = None, max_results: int = 5, max_snippet_length: int = 100\n ) -> list[dict[str, Any]]:\n params = params or {}\n full_results = wrapper.results(query=query, **params)\n organic_results = full_results.get(\"organic_results\", [])[:max_results]\n\n limited_results = []\n for result in organic_results:\n limited_result = {\n \"title\": result.get(\"title\", \"\")[:max_snippet_length],\n \"link\": result.get(\"link\", \"\"),\n \"snippet\": result.get(\"snippet\", \"\")[:max_snippet_length],\n }\n limited_results.append(limited_result)\n\n return limited_results\n\n tool = StructuredTool.from_function(\n name=\"search_api\",\n description=\"Search for recent results using searchapi.io with result limiting\",\n func=search_func,\n args_schema=self.SearchAPISchema,\n )\n\n self.status = f\"Search API Tool created with engine: {self.engine}\"\n return tool\n\n def run_model(self) -> list[Data]:\n tool = self.build_tool()\n results = tool.run(\n {\n \"query\": self.input_value,\n \"params\": self.search_params or {},\n \"max_results\": self.max_results,\n \"max_snippet_length\": self.max_snippet_length,\n }\n )\n\n data_list = [Data(data=result, text=result.get(\"snippet\", \"\")) for result in results]\n\n self.status = data_list\n return data_list\n" }, "engine": { "advanced": false, diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Travel Planning Agents.json b/src/backend/base/langflow/initial_setup/starter_projects/Travel Planning Agents.json index b603ef99426d..251a2c550ab8 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Travel Planning Agents.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Travel Planning Agents.json @@ -1469,7 +1469,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from typing import Any, Dict, List, Optional\n\nfrom langchain.tools import StructuredTool\nfrom langchain_community.utilities.searchapi import SearchApiAPIWrapper\nfrom pydantic import BaseModel, Field\n\nfrom langflow.base.langchain_utilities.model import LCToolComponent\nfrom langflow.field_typing import Tool\nfrom langflow.inputs import DictInput, IntInput, MessageTextInput, MultilineInput, SecretStrInput\nfrom langflow.schema import Data\n\n\nclass SearchAPIComponent(LCToolComponent):\n display_name: str = \"Search API\"\n description: str = \"Call the searchapi.io API with result limiting\"\n name = \"SearchAPI\"\n documentation: str = \"https://www.searchapi.io/docs/google\"\n\n inputs = [\n MessageTextInput(name=\"engine\", display_name=\"Engine\", value=\"google\"),\n SecretStrInput(name=\"api_key\", display_name=\"SearchAPI API Key\", required=True),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input\",\n ),\n DictInput(name=\"search_params\", display_name=\"Search parameters\", advanced=True, is_list=True),\n IntInput(name=\"max_results\", display_name=\"Max Results\", value=5, advanced=True),\n IntInput(name=\"max_snippet_length\", display_name=\"Max Snippet Length\", value=100, advanced=True),\n ]\n\n class SearchAPISchema(BaseModel):\n query: str = Field(..., description=\"The search query\")\n params: Optional[Dict[str, Any]] = Field(default_factory=dict, description=\"Additional search parameters\")\n max_results: int = Field(5, description=\"Maximum number of results to return\")\n max_snippet_length: int = Field(100, description=\"Maximum length of each result snippet\")\n\n def _build_wrapper(self):\n return SearchApiAPIWrapper(engine=self.engine, searchapi_api_key=self.api_key)\n\n def build_tool(self) -> Tool:\n wrapper = self._build_wrapper()\n\n def search_func(\n query: str, params: Optional[Dict[str, Any]] = None, max_results: int = 5, max_snippet_length: int = 100\n ) -> List[Dict[str, Any]]:\n params = params or {}\n full_results = wrapper.results(query=query, **params)\n organic_results = full_results.get(\"organic_results\", [])[:max_results]\n\n limited_results = []\n for result in organic_results:\n limited_result = {\n \"title\": result.get(\"title\", \"\")[:max_snippet_length],\n \"link\": result.get(\"link\", \"\"),\n \"snippet\": result.get(\"snippet\", \"\")[:max_snippet_length],\n }\n limited_results.append(limited_result)\n\n return limited_results\n\n tool = StructuredTool.from_function(\n name=\"search_api\",\n description=\"Search for recent results using searchapi.io with result limiting\",\n func=search_func,\n args_schema=self.SearchAPISchema,\n )\n\n self.status = f\"Search API Tool created with engine: {self.engine}\"\n return tool\n\n def run_model(self) -> List[Data]:\n tool = self.build_tool()\n results = tool.run(\n {\n \"query\": self.input_value,\n \"params\": self.search_params or {},\n \"max_results\": self.max_results,\n \"max_snippet_length\": self.max_snippet_length,\n }\n )\n\n data_list = [Data(data=result, text=result.get(\"snippet\", \"\")) for result in results]\n\n self.status = data_list\n return data_list\n" + "value": "from typing import Any\n\nfrom langchain.tools import StructuredTool\nfrom langchain_community.utilities.searchapi import SearchApiAPIWrapper\nfrom pydantic import BaseModel, Field\n\nfrom langflow.base.langchain_utilities.model import LCToolComponent\nfrom langflow.field_typing import Tool\nfrom langflow.inputs import DictInput, IntInput, MessageTextInput, MultilineInput, SecretStrInput\nfrom langflow.schema import Data\n\n\nclass SearchAPIComponent(LCToolComponent):\n display_name: str = \"Search API\"\n description: str = \"Call the searchapi.io API with result limiting\"\n name = \"SearchAPI\"\n documentation: str = \"https://www.searchapi.io/docs/google\"\n\n inputs = [\n MessageTextInput(name=\"engine\", display_name=\"Engine\", value=\"google\"),\n SecretStrInput(name=\"api_key\", display_name=\"SearchAPI API Key\", required=True),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input\",\n ),\n DictInput(name=\"search_params\", display_name=\"Search parameters\", advanced=True, is_list=True),\n IntInput(name=\"max_results\", display_name=\"Max Results\", value=5, advanced=True),\n IntInput(name=\"max_snippet_length\", display_name=\"Max Snippet Length\", value=100, advanced=True),\n ]\n\n class SearchAPISchema(BaseModel):\n query: str = Field(..., description=\"The search query\")\n params: dict[str, Any] | None = Field(default_factory=dict, description=\"Additional search parameters\")\n max_results: int = Field(5, description=\"Maximum number of results to return\")\n max_snippet_length: int = Field(100, description=\"Maximum length of each result snippet\")\n\n def _build_wrapper(self):\n return SearchApiAPIWrapper(engine=self.engine, searchapi_api_key=self.api_key)\n\n def build_tool(self) -> Tool:\n wrapper = self._build_wrapper()\n\n def search_func(\n query: str, params: dict[str, Any] | None = None, max_results: int = 5, max_snippet_length: int = 100\n ) -> list[dict[str, Any]]:\n params = params or {}\n full_results = wrapper.results(query=query, **params)\n organic_results = full_results.get(\"organic_results\", [])[:max_results]\n\n limited_results = []\n for result in organic_results:\n limited_result = {\n \"title\": result.get(\"title\", \"\")[:max_snippet_length],\n \"link\": result.get(\"link\", \"\"),\n \"snippet\": result.get(\"snippet\", \"\")[:max_snippet_length],\n }\n limited_results.append(limited_result)\n\n return limited_results\n\n tool = StructuredTool.from_function(\n name=\"search_api\",\n description=\"Search for recent results using searchapi.io with result limiting\",\n func=search_func,\n args_schema=self.SearchAPISchema,\n )\n\n self.status = f\"Search API Tool created with engine: {self.engine}\"\n return tool\n\n def run_model(self) -> list[Data]:\n tool = self.build_tool()\n results = tool.run(\n {\n \"query\": self.input_value,\n \"params\": self.search_params or {},\n \"max_results\": self.max_results,\n \"max_snippet_length\": self.max_snippet_length,\n }\n )\n\n data_list = [Data(data=result, text=result.get(\"snippet\", \"\")) for result in results]\n\n self.status = data_list\n return data_list\n" }, "engine": { "_input_type": "MessageTextInput", @@ -2310,7 +2310,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import ast\nimport operator\nfrom typing import List\n\nfrom langchain.tools import StructuredTool\nfrom pydantic import BaseModel, Field\n\nfrom langflow.base.langchain_utilities.model import LCToolComponent\nfrom langflow.field_typing import Tool\nfrom langflow.inputs import MessageTextInput\nfrom langflow.schema import Data\n\n\nclass CalculatorToolComponent(LCToolComponent):\n display_name = \"Calculator\"\n description = \"Perform basic arithmetic operations on a given expression.\"\n icon = \"calculator\"\n name = \"CalculatorTool\"\n\n inputs = [\n MessageTextInput(\n name=\"expression\",\n display_name=\"Expression\",\n info=\"The arithmetic expression to evaluate (e.g., '4*4*(33/22)+12-20').\",\n ),\n ]\n\n class CalculatorToolSchema(BaseModel):\n expression: str = Field(..., description=\"The arithmetic expression to evaluate.\")\n\n def run_model(self) -> List[Data]:\n return self._evaluate_expression(self.expression)\n\n def build_tool(self) -> Tool:\n return StructuredTool.from_function(\n name=\"calculator\",\n description=\"Evaluate basic arithmetic expressions. Input should be a string containing the expression.\",\n func=self._evaluate_expression,\n args_schema=self.CalculatorToolSchema,\n )\n\n def _evaluate_expression(self, expression: str) -> List[Data]:\n try:\n # Define the allowed operators\n operators = {\n ast.Add: operator.add,\n ast.Sub: operator.sub,\n ast.Mult: operator.mul,\n ast.Div: operator.truediv,\n ast.Pow: operator.pow,\n }\n\n def eval_expr(node):\n if isinstance(node, ast.Num):\n return node.n\n elif isinstance(node, ast.BinOp):\n return operators[type(node.op)](eval_expr(node.left), eval_expr(node.right))\n elif isinstance(node, ast.UnaryOp):\n return operators[type(node.op)](eval_expr(node.operand))\n else:\n raise TypeError(node)\n\n # Parse the expression and evaluate it\n tree = ast.parse(expression, mode=\"eval\")\n result = eval_expr(tree.body)\n\n # Format the result to a reasonable number of decimal places\n formatted_result = f\"{result:.6f}\".rstrip(\"0\").rstrip(\".\")\n\n self.status = formatted_result\n return [Data(data={\"result\": formatted_result})]\n\n except (SyntaxError, TypeError, KeyError) as e:\n error_message = f\"Invalid expression: {str(e)}\"\n self.status = error_message\n return [Data(data={\"error\": error_message})]\n except ZeroDivisionError:\n error_message = \"Error: Division by zero\"\n self.status = error_message\n return [Data(data={\"error\": error_message})]\n except Exception as e:\n error_message = f\"Error: {str(e)}\"\n self.status = error_message\n return [Data(data={\"error\": error_message})]\n" + "value": "import ast\nimport operator\n\nfrom langchain.tools import StructuredTool\nfrom pydantic import BaseModel, Field\n\nfrom langflow.base.langchain_utilities.model import LCToolComponent\nfrom langflow.field_typing import Tool\nfrom langflow.inputs import MessageTextInput\nfrom langflow.schema import Data\n\n\nclass CalculatorToolComponent(LCToolComponent):\n display_name = \"Calculator\"\n description = \"Perform basic arithmetic operations on a given expression.\"\n icon = \"calculator\"\n name = \"CalculatorTool\"\n\n inputs = [\n MessageTextInput(\n name=\"expression\",\n display_name=\"Expression\",\n info=\"The arithmetic expression to evaluate (e.g., '4*4*(33/22)+12-20').\",\n ),\n ]\n\n class CalculatorToolSchema(BaseModel):\n expression: str = Field(..., description=\"The arithmetic expression to evaluate.\")\n\n def run_model(self) -> list[Data]:\n return self._evaluate_expression(self.expression)\n\n def build_tool(self) -> Tool:\n return StructuredTool.from_function(\n name=\"calculator\",\n description=\"Evaluate basic arithmetic expressions. Input should be a string containing the expression.\",\n func=self._evaluate_expression,\n args_schema=self.CalculatorToolSchema,\n )\n\n def _evaluate_expression(self, expression: str) -> list[Data]:\n try:\n # Define the allowed operators\n operators = {\n ast.Add: operator.add,\n ast.Sub: operator.sub,\n ast.Mult: operator.mul,\n ast.Div: operator.truediv,\n ast.Pow: operator.pow,\n }\n\n def eval_expr(node):\n if isinstance(node, ast.Num):\n return node.n\n elif isinstance(node, ast.BinOp):\n return operators[type(node.op)](eval_expr(node.left), eval_expr(node.right))\n elif isinstance(node, ast.UnaryOp):\n return operators[type(node.op)](eval_expr(node.operand))\n else:\n raise TypeError(node)\n\n # Parse the expression and evaluate it\n tree = ast.parse(expression, mode=\"eval\")\n result = eval_expr(tree.body)\n\n # Format the result to a reasonable number of decimal places\n formatted_result = f\"{result:.6f}\".rstrip(\"0\").rstrip(\".\")\n\n self.status = formatted_result\n return [Data(data={\"result\": formatted_result})]\n\n except (SyntaxError, TypeError, KeyError) as e:\n error_message = f\"Invalid expression: {str(e)}\"\n self.status = error_message\n return [Data(data={\"error\": error_message})]\n except ZeroDivisionError:\n error_message = \"Error: Division by zero\"\n self.status = error_message\n return [Data(data={\"error\": error_message})]\n except Exception as e:\n error_message = f\"Error: {str(e)}\"\n self.status = error_message\n return [Data(data={\"error\": error_message})]\n" }, "expression": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index 55ca7a71701d..1bf063006705 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -656,7 +656,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from loguru import logger\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://python.langchain.com/docs/integrations/vectorstores/astradb\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n VECTORIZE_PROVIDERS_MAPPING = {\n \"Azure OpenAI\": [\"azureOpenAI\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"NVIDIA\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n }\n\n inputs = [\n StrInput(\n name=\"collection_name\",\n display_name=\"Collection Name\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n ),\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n is_list=True,\n ),\n StrInput(\n name=\"namespace\",\n display_name=\"Namespace\",\n info=\"Optional namespace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_service\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync', 'Async', or 'Off'.\",\n options=[\"Sync\", \"Async\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info=\"Optional dictionary defining the indexing policy for the collection.\",\n advanced=True,\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n is_list=True,\n ),\n ]\n\n @check_cached_vector_store\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n for i, (key, value) in enumerate(items):\n if key == field_name:\n break\n\n items.insert(i + 1, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n if field_name == \"embedding_service\":\n if field_value == \"Astra Vectorize\":\n for field in [\"embedding\"]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = DropdownInput(\n name=\"provider\",\n display_name=\"Vectorize Provider\",\n options=self.VECTORIZE_PROVIDERS_MAPPING.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"provider\": new_parameter})\n else:\n for field in [\n \"provider\",\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"embedding\": new_parameter})\n\n elif field_name == \"provider\":\n for field in [\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n model_options = self.VECTORIZE_PROVIDERS_MAPPING[field_value][1]\n\n new_parameter_0 = DropdownInput(\n name=\"z_00_model_name\",\n display_name=\"Model Name\",\n info=f\"The embedding model to use for the selected provider. Each provider has a different set of models \"\n f\"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n{', '.join(model_options)}\",\n options=model_options,\n required=True,\n ).to_dict()\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n is_list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key name\",\n info=\"The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider with each request to Astra DB. This may be used when Vectorize is configured for the collection, but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication parameters\",\n is_list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"provider\",\n {\n \"z_00_model_name\": new_parameter_0,\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"provider\",\n \"z_00_api_key_name\",\n \"z_01_model_name\",\n \"z_02_authentication\",\n \"z_03_provider_api_key\",\n \"z_04_model_parameters\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_value = self.VECTORIZE_PROVIDERS_MAPPING.get(self.provider, [None])[0] or kwargs.get(\"provider\")\n authentication = {**(self.z_02_authentication or kwargs.get(\"z_02_authentication\", {}))}\n\n api_key_name = self.z_00_api_key_name or kwargs.get(\"z_00_api_key_name\")\n provider_key_name = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if provider_key_name:\n authentication[\"providerKey\"] = provider_key_name\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": self.z_01_model_name or kwargs.get(\"z_01_model_name\"),\n \"authentication\": authentication,\n \"parameters\": self.z_04_model_parameters or kwargs.get(\"z_04_model_parameters\", {}),\n },\n \"collection_embedding_api_key\": self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\"),\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError:\n raise ImportError(\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError:\n raise ValueError(f\"Invalid setup mode: {self.setup_mode}\")\n\n if self.embedding:\n embedding_dict = {\"embedding\": self.embedding}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n dict_options = vectorize_options or self.build_vectorize_options()\n dict_options[\"authentication\"] = {\n k: v for k, v in dict_options.get(\"authentication\", {}).items() if k and v\n }\n dict_options[\"parameters\"] = {k: v for k, v in dict_options.get(\"parameters\", {}).items() if k and v}\n\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\", {})\n ),\n }\n\n vector_store_kwargs = {\n **embedding_dict,\n \"collection_name\": self.collection_name,\n \"token\": self.token,\n \"api_endpoint\": self.api_endpoint,\n \"namespace\": self.namespace or None,\n \"metric\": self.metric or None,\n \"batch_size\": self.batch_size or None,\n \"bulk_insert_batch_concurrency\": self.bulk_insert_batch_concurrency or None,\n \"bulk_insert_overwrite_concurrency\": self.bulk_insert_overwrite_concurrency or None,\n \"bulk_delete_concurrency\": self.bulk_delete_concurrency or None,\n \"setup_mode\": setup_mode_value,\n \"pre_delete_collection\": self.pre_delete_collection or False,\n }\n\n if self.metadata_indexing_include:\n vector_store_kwargs[\"metadata_indexing_include\"] = self.metadata_indexing_include\n elif self.metadata_indexing_exclude:\n vector_store_kwargs[\"metadata_indexing_exclude\"] = self.metadata_indexing_exclude\n elif self.collection_indexing_policy:\n vector_store_kwargs[\"collection_indexing_policy\"] = self.collection_indexing_policy\n\n try:\n vector_store = AstraDBVectorStore(**vector_store_kwargs)\n except Exception as e:\n raise ValueError(f\"Error initializing AstraDBVectorStore: {str(e)}\") from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store):\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n raise ValueError(\"Vector Store Inputs must be Data objects.\")\n\n if documents:\n logger.debug(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n raise ValueError(f\"Error adding documents to AstraDBVectorStore: {str(e)}\") from e\n else:\n logger.debug(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self):\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n elif self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n else:\n return \"similarity\"\n\n def _build_search_args(self):\n args = {\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n\n if self.search_filter:\n clean_filter = {k: v for k, v in self.search_filter.items() if k and v}\n if len(clean_filter) > 0:\n args[\"filter\"] = clean_filter\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n if not vector_store:\n vector_store = self.build_vector_store()\n\n logger.debug(f\"Search input: {self.search_input}\")\n logger.debug(f\"Search type: {self.search_type}\")\n logger.debug(f\"Number of results: {self.number_of_results}\")\n\n if self.search_input and isinstance(self.search_input, str) and self.search_input.strip():\n try:\n search_type = self._map_search_type()\n search_args = self._build_search_args()\n\n docs = vector_store.search(query=self.search_input, search_type=search_type, **search_args)\n except Exception as e:\n raise ValueError(f\"Error performing search in AstraDBVectorStore: {str(e)}\") from e\n\n logger.debug(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n logger.debug(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n else:\n logger.debug(\"No search input provided. Skipping search.\")\n return []\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + "value": "import os\n\nfrom loguru import logger\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://python.langchain.com/docs/integrations/vectorstores/astradb\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n VECTORIZE_PROVIDERS_MAPPING = {\n \"Azure OpenAI\": [\"azureOpenAI\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"NVIDIA\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n }\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n StrInput(\n name=\"collection_name\",\n display_name=\"Collection Name\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n is_list=True,\n ),\n StrInput(\n name=\"namespace\",\n display_name=\"Namespace\",\n info=\"Optional namespace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_service\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync', 'Async', or 'Off'.\",\n options=[\"Sync\", \"Async\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info=\"Optional dictionary defining the indexing policy for the collection.\",\n advanced=True,\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n is_list=True,\n ),\n ]\n\n @check_cached_vector_store\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n for i, (key, value) in enumerate(items):\n if key == field_name:\n break\n\n items.insert(i + 1, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n if field_name == \"embedding_service\":\n if field_value == \"Astra Vectorize\":\n for field in [\"embedding\"]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = DropdownInput(\n name=\"provider\",\n display_name=\"Vectorize Provider\",\n options=self.VECTORIZE_PROVIDERS_MAPPING.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"provider\": new_parameter})\n else:\n for field in [\n \"provider\",\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"embedding\": new_parameter})\n\n elif field_name == \"provider\":\n for field in [\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n model_options = self.VECTORIZE_PROVIDERS_MAPPING[field_value][1]\n\n new_parameter_0 = DropdownInput(\n name=\"z_00_model_name\",\n display_name=\"Model Name\",\n info=f\"The embedding model to use for the selected provider. Each provider has a different set of models \"\n f\"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n{', '.join(model_options)}\",\n options=model_options,\n required=True,\n ).to_dict()\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n is_list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key name\",\n info=\"The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider with each request to Astra DB. This may be used when Vectorize is configured for the collection, but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication parameters\",\n is_list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"provider\",\n {\n \"z_00_model_name\": new_parameter_0,\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"provider\",\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_value = self.VECTORIZE_PROVIDERS_MAPPING.get(self.provider, [None])[0] or kwargs.get(\"provider\")\n authentication = {**(self.z_04_authentication or kwargs.get(\"z_04_authentication\", {}))}\n\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key_name = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if provider_key_name:\n authentication[\"providerKey\"] = provider_key_name\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": self.z_00_model_name or kwargs.get(\"z_00_model_name\"),\n \"authentication\": authentication,\n \"parameters\": self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {}),\n },\n \"collection_embedding_api_key\": self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\"),\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError:\n raise ImportError(\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError:\n raise ValueError(f\"Invalid setup mode: {self.setup_mode}\")\n\n if self.embedding:\n embedding_dict = {\"embedding\": self.embedding}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n dict_options = vectorize_options or self.build_vectorize_options()\n dict_options[\"authentication\"] = {\n k: v for k, v in dict_options.get(\"authentication\", {}).items() if k and v\n }\n dict_options[\"parameters\"] = {k: v for k, v in dict_options.get(\"parameters\", {}).items() if k and v}\n\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\", {})\n ),\n }\n\n vector_store_kwargs = {\n **embedding_dict,\n \"collection_name\": self.collection_name,\n \"token\": self.token,\n \"api_endpoint\": self.api_endpoint,\n \"namespace\": self.namespace or None,\n \"metric\": self.metric or None,\n \"batch_size\": self.batch_size or None,\n \"bulk_insert_batch_concurrency\": self.bulk_insert_batch_concurrency or None,\n \"bulk_insert_overwrite_concurrency\": self.bulk_insert_overwrite_concurrency or None,\n \"bulk_delete_concurrency\": self.bulk_delete_concurrency or None,\n \"setup_mode\": setup_mode_value,\n \"pre_delete_collection\": self.pre_delete_collection or False,\n }\n\n if self.metadata_indexing_include:\n vector_store_kwargs[\"metadata_indexing_include\"] = self.metadata_indexing_include\n elif self.metadata_indexing_exclude:\n vector_store_kwargs[\"metadata_indexing_exclude\"] = self.metadata_indexing_exclude\n elif self.collection_indexing_policy:\n vector_store_kwargs[\"collection_indexing_policy\"] = self.collection_indexing_policy\n\n try:\n vector_store = AstraDBVectorStore(**vector_store_kwargs)\n except Exception as e:\n raise ValueError(f\"Error initializing AstraDBVectorStore: {str(e)}\") from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store):\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n raise ValueError(\"Vector Store Inputs must be Data objects.\")\n\n if documents:\n logger.debug(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n raise ValueError(f\"Error adding documents to AstraDBVectorStore: {str(e)}\") from e\n else:\n logger.debug(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self):\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n elif self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n else:\n return \"similarity\"\n\n def _build_search_args(self):\n args = {\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n\n if self.search_filter:\n clean_filter = {k: v for k, v in self.search_filter.items() if k and v}\n if len(clean_filter) > 0:\n args[\"filter\"] = clean_filter\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n if not vector_store:\n vector_store = self.build_vector_store()\n\n logger.debug(f\"Search input: {self.search_input}\")\n logger.debug(f\"Search type: {self.search_type}\")\n logger.debug(f\"Number of results: {self.number_of_results}\")\n\n if self.search_input and isinstance(self.search_input, str) and self.search_input.strip():\n try:\n search_type = self._map_search_type()\n search_args = self._build_search_args()\n\n docs = vector_store.search(query=self.search_input, search_type=search_type, **search_args)\n except Exception as e:\n raise ValueError(f\"Error performing search in AstraDBVectorStore: {str(e)}\") from e\n\n logger.debug(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n logger.debug(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n else:\n logger.debug(\"No search input provided. Skipping search.\")\n return []\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" }, "collection_indexing_policy": { "advanced": true, @@ -1528,7 +1528,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from typing import List\n\nfrom langchain_text_splitters import CharacterTextSplitter\n\nfrom langflow.custom import Component\nfrom langflow.io import HandleInput, IntInput, MessageTextInput, Output\nfrom langflow.schema import Data\nfrom langflow.utils.util import unescape_string\n\n\nclass SplitTextComponent(Component):\n display_name: str = \"Split Text\"\n description: str = \"Split text into chunks based on specified criteria.\"\n icon = \"scissors-line-dashed\"\n name = \"SplitText\"\n\n inputs = [\n HandleInput(\n name=\"data_inputs\",\n display_name=\"Data Inputs\",\n info=\"The data to split.\",\n input_types=[\"Data\"],\n is_list=True,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"Number of characters to overlap between chunks.\",\n value=200,\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"The maximum number of characters in each chunk.\",\n value=1000,\n ),\n MessageTextInput(\n name=\"separator\",\n display_name=\"Separator\",\n info=\"The character to split on. Defaults to newline.\",\n value=\"\\n\",\n ),\n ]\n\n outputs = [\n Output(display_name=\"Chunks\", name=\"chunks\", method=\"split_text\"),\n ]\n\n def _docs_to_data(self, docs):\n data = []\n for doc in docs:\n data.append(Data(text=doc.page_content, data=doc.metadata))\n return data\n\n def split_text(self) -> List[Data]:\n separator = unescape_string(self.separator)\n\n documents = []\n for _input in self.data_inputs:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n\n splitter = CharacterTextSplitter(\n chunk_overlap=self.chunk_overlap,\n chunk_size=self.chunk_size,\n separator=separator,\n )\n docs = splitter.split_documents(documents)\n data = self._docs_to_data(docs)\n self.status = data\n return data\n" + "value": "from langchain_text_splitters import CharacterTextSplitter\n\nfrom langflow.custom import Component\nfrom langflow.io import HandleInput, IntInput, MessageTextInput, Output\nfrom langflow.schema import Data\nfrom langflow.utils.util import unescape_string\n\n\nclass SplitTextComponent(Component):\n display_name: str = \"Split Text\"\n description: str = \"Split text into chunks based on specified criteria.\"\n icon = \"scissors-line-dashed\"\n name = \"SplitText\"\n\n inputs = [\n HandleInput(\n name=\"data_inputs\",\n display_name=\"Data Inputs\",\n info=\"The data to split.\",\n input_types=[\"Data\"],\n is_list=True,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"Number of characters to overlap between chunks.\",\n value=200,\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"The maximum number of characters in each chunk.\",\n value=1000,\n ),\n MessageTextInput(\n name=\"separator\",\n display_name=\"Separator\",\n info=\"The character to split on. Defaults to newline.\",\n value=\"\\n\",\n ),\n ]\n\n outputs = [\n Output(display_name=\"Chunks\", name=\"chunks\", method=\"split_text\"),\n ]\n\n def _docs_to_data(self, docs):\n data = []\n for doc in docs:\n data.append(Data(text=doc.page_content, data=doc.metadata))\n return data\n\n def split_text(self) -> list[Data]:\n separator = unescape_string(self.separator)\n\n documents = []\n for _input in self.data_inputs:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n\n splitter = CharacterTextSplitter(\n chunk_overlap=self.chunk_overlap,\n chunk_size=self.chunk_size,\n separator=separator,\n )\n docs = splitter.split_documents(documents)\n data = self._docs_to_data(docs)\n self.status = data\n return data\n" }, "data_inputs": { "advanced": false, @@ -1890,7 +1890,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from loguru import logger\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://python.langchain.com/docs/integrations/vectorstores/astradb\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n VECTORIZE_PROVIDERS_MAPPING = {\n \"Azure OpenAI\": [\"azureOpenAI\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"NVIDIA\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n }\n\n inputs = [\n StrInput(\n name=\"collection_name\",\n display_name=\"Collection Name\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n ),\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n is_list=True,\n ),\n StrInput(\n name=\"namespace\",\n display_name=\"Namespace\",\n info=\"Optional namespace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_service\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync', 'Async', or 'Off'.\",\n options=[\"Sync\", \"Async\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info=\"Optional dictionary defining the indexing policy for the collection.\",\n advanced=True,\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n is_list=True,\n ),\n ]\n\n @check_cached_vector_store\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n for i, (key, value) in enumerate(items):\n if key == field_name:\n break\n\n items.insert(i + 1, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n if field_name == \"embedding_service\":\n if field_value == \"Astra Vectorize\":\n for field in [\"embedding\"]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = DropdownInput(\n name=\"provider\",\n display_name=\"Vectorize Provider\",\n options=self.VECTORIZE_PROVIDERS_MAPPING.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"provider\": new_parameter})\n else:\n for field in [\n \"provider\",\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"embedding\": new_parameter})\n\n elif field_name == \"provider\":\n for field in [\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n model_options = self.VECTORIZE_PROVIDERS_MAPPING[field_value][1]\n\n new_parameter_0 = DropdownInput(\n name=\"z_00_model_name\",\n display_name=\"Model Name\",\n info=f\"The embedding model to use for the selected provider. Each provider has a different set of models \"\n f\"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n{', '.join(model_options)}\",\n options=model_options,\n required=True,\n ).to_dict()\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n is_list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key name\",\n info=\"The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider with each request to Astra DB. This may be used when Vectorize is configured for the collection, but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication parameters\",\n is_list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"provider\",\n {\n \"z_00_model_name\": new_parameter_0,\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"provider\",\n \"z_00_api_key_name\",\n \"z_01_model_name\",\n \"z_02_authentication\",\n \"z_03_provider_api_key\",\n \"z_04_model_parameters\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_value = self.VECTORIZE_PROVIDERS_MAPPING.get(self.provider, [None])[0] or kwargs.get(\"provider\")\n authentication = {**(self.z_02_authentication or kwargs.get(\"z_02_authentication\", {}))}\n\n api_key_name = self.z_00_api_key_name or kwargs.get(\"z_00_api_key_name\")\n provider_key_name = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if provider_key_name:\n authentication[\"providerKey\"] = provider_key_name\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": self.z_01_model_name or kwargs.get(\"z_01_model_name\"),\n \"authentication\": authentication,\n \"parameters\": self.z_04_model_parameters or kwargs.get(\"z_04_model_parameters\", {}),\n },\n \"collection_embedding_api_key\": self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\"),\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError:\n raise ImportError(\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError:\n raise ValueError(f\"Invalid setup mode: {self.setup_mode}\")\n\n if self.embedding:\n embedding_dict = {\"embedding\": self.embedding}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n dict_options = vectorize_options or self.build_vectorize_options()\n dict_options[\"authentication\"] = {\n k: v for k, v in dict_options.get(\"authentication\", {}).items() if k and v\n }\n dict_options[\"parameters\"] = {k: v for k, v in dict_options.get(\"parameters\", {}).items() if k and v}\n\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\", {})\n ),\n }\n\n vector_store_kwargs = {\n **embedding_dict,\n \"collection_name\": self.collection_name,\n \"token\": self.token,\n \"api_endpoint\": self.api_endpoint,\n \"namespace\": self.namespace or None,\n \"metric\": self.metric or None,\n \"batch_size\": self.batch_size or None,\n \"bulk_insert_batch_concurrency\": self.bulk_insert_batch_concurrency or None,\n \"bulk_insert_overwrite_concurrency\": self.bulk_insert_overwrite_concurrency or None,\n \"bulk_delete_concurrency\": self.bulk_delete_concurrency or None,\n \"setup_mode\": setup_mode_value,\n \"pre_delete_collection\": self.pre_delete_collection or False,\n }\n\n if self.metadata_indexing_include:\n vector_store_kwargs[\"metadata_indexing_include\"] = self.metadata_indexing_include\n elif self.metadata_indexing_exclude:\n vector_store_kwargs[\"metadata_indexing_exclude\"] = self.metadata_indexing_exclude\n elif self.collection_indexing_policy:\n vector_store_kwargs[\"collection_indexing_policy\"] = self.collection_indexing_policy\n\n try:\n vector_store = AstraDBVectorStore(**vector_store_kwargs)\n except Exception as e:\n raise ValueError(f\"Error initializing AstraDBVectorStore: {str(e)}\") from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store):\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n raise ValueError(\"Vector Store Inputs must be Data objects.\")\n\n if documents:\n logger.debug(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n raise ValueError(f\"Error adding documents to AstraDBVectorStore: {str(e)}\") from e\n else:\n logger.debug(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self):\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n elif self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n else:\n return \"similarity\"\n\n def _build_search_args(self):\n args = {\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n\n if self.search_filter:\n clean_filter = {k: v for k, v in self.search_filter.items() if k and v}\n if len(clean_filter) > 0:\n args[\"filter\"] = clean_filter\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n if not vector_store:\n vector_store = self.build_vector_store()\n\n logger.debug(f\"Search input: {self.search_input}\")\n logger.debug(f\"Search type: {self.search_type}\")\n logger.debug(f\"Number of results: {self.number_of_results}\")\n\n if self.search_input and isinstance(self.search_input, str) and self.search_input.strip():\n try:\n search_type = self._map_search_type()\n search_args = self._build_search_args()\n\n docs = vector_store.search(query=self.search_input, search_type=search_type, **search_args)\n except Exception as e:\n raise ValueError(f\"Error performing search in AstraDBVectorStore: {str(e)}\") from e\n\n logger.debug(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n logger.debug(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n else:\n logger.debug(\"No search input provided. Skipping search.\")\n return []\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + "value": "import os\n\nfrom loguru import logger\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://python.langchain.com/docs/integrations/vectorstores/astradb\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n VECTORIZE_PROVIDERS_MAPPING = {\n \"Azure OpenAI\": [\"azureOpenAI\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"NVIDIA\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n }\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n StrInput(\n name=\"collection_name\",\n display_name=\"Collection Name\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n is_list=True,\n ),\n StrInput(\n name=\"namespace\",\n display_name=\"Namespace\",\n info=\"Optional namespace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_service\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync', 'Async', or 'Off'.\",\n options=[\"Sync\", \"Async\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info=\"Optional dictionary defining the indexing policy for the collection.\",\n advanced=True,\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n is_list=True,\n ),\n ]\n\n @check_cached_vector_store\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n for i, (key, value) in enumerate(items):\n if key == field_name:\n break\n\n items.insert(i + 1, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n if field_name == \"embedding_service\":\n if field_value == \"Astra Vectorize\":\n for field in [\"embedding\"]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = DropdownInput(\n name=\"provider\",\n display_name=\"Vectorize Provider\",\n options=self.VECTORIZE_PROVIDERS_MAPPING.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"provider\": new_parameter})\n else:\n for field in [\n \"provider\",\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"embedding\": new_parameter})\n\n elif field_name == \"provider\":\n for field in [\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n model_options = self.VECTORIZE_PROVIDERS_MAPPING[field_value][1]\n\n new_parameter_0 = DropdownInput(\n name=\"z_00_model_name\",\n display_name=\"Model Name\",\n info=f\"The embedding model to use for the selected provider. Each provider has a different set of models \"\n f\"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n{', '.join(model_options)}\",\n options=model_options,\n required=True,\n ).to_dict()\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n is_list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key name\",\n info=\"The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider with each request to Astra DB. This may be used when Vectorize is configured for the collection, but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication parameters\",\n is_list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"provider\",\n {\n \"z_00_model_name\": new_parameter_0,\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"provider\",\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_value = self.VECTORIZE_PROVIDERS_MAPPING.get(self.provider, [None])[0] or kwargs.get(\"provider\")\n authentication = {**(self.z_04_authentication or kwargs.get(\"z_04_authentication\", {}))}\n\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key_name = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if provider_key_name:\n authentication[\"providerKey\"] = provider_key_name\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": self.z_00_model_name or kwargs.get(\"z_00_model_name\"),\n \"authentication\": authentication,\n \"parameters\": self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {}),\n },\n \"collection_embedding_api_key\": self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\"),\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError:\n raise ImportError(\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError:\n raise ValueError(f\"Invalid setup mode: {self.setup_mode}\")\n\n if self.embedding:\n embedding_dict = {\"embedding\": self.embedding}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n dict_options = vectorize_options or self.build_vectorize_options()\n dict_options[\"authentication\"] = {\n k: v for k, v in dict_options.get(\"authentication\", {}).items() if k and v\n }\n dict_options[\"parameters\"] = {k: v for k, v in dict_options.get(\"parameters\", {}).items() if k and v}\n\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\", {})\n ),\n }\n\n vector_store_kwargs = {\n **embedding_dict,\n \"collection_name\": self.collection_name,\n \"token\": self.token,\n \"api_endpoint\": self.api_endpoint,\n \"namespace\": self.namespace or None,\n \"metric\": self.metric or None,\n \"batch_size\": self.batch_size or None,\n \"bulk_insert_batch_concurrency\": self.bulk_insert_batch_concurrency or None,\n \"bulk_insert_overwrite_concurrency\": self.bulk_insert_overwrite_concurrency or None,\n \"bulk_delete_concurrency\": self.bulk_delete_concurrency or None,\n \"setup_mode\": setup_mode_value,\n \"pre_delete_collection\": self.pre_delete_collection or False,\n }\n\n if self.metadata_indexing_include:\n vector_store_kwargs[\"metadata_indexing_include\"] = self.metadata_indexing_include\n elif self.metadata_indexing_exclude:\n vector_store_kwargs[\"metadata_indexing_exclude\"] = self.metadata_indexing_exclude\n elif self.collection_indexing_policy:\n vector_store_kwargs[\"collection_indexing_policy\"] = self.collection_indexing_policy\n\n try:\n vector_store = AstraDBVectorStore(**vector_store_kwargs)\n except Exception as e:\n raise ValueError(f\"Error initializing AstraDBVectorStore: {str(e)}\") from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store):\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n raise ValueError(\"Vector Store Inputs must be Data objects.\")\n\n if documents:\n logger.debug(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n raise ValueError(f\"Error adding documents to AstraDBVectorStore: {str(e)}\") from e\n else:\n logger.debug(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self):\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n elif self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n else:\n return \"similarity\"\n\n def _build_search_args(self):\n args = {\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n\n if self.search_filter:\n clean_filter = {k: v for k, v in self.search_filter.items() if k and v}\n if len(clean_filter) > 0:\n args[\"filter\"] = clean_filter\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n if not vector_store:\n vector_store = self.build_vector_store()\n\n logger.debug(f\"Search input: {self.search_input}\")\n logger.debug(f\"Search type: {self.search_type}\")\n logger.debug(f\"Number of results: {self.number_of_results}\")\n\n if self.search_input and isinstance(self.search_input, str) and self.search_input.strip():\n try:\n search_type = self._map_search_type()\n search_args = self._build_search_args()\n\n docs = vector_store.search(query=self.search_input, search_type=search_type, **search_args)\n except Exception as e:\n raise ValueError(f\"Error performing search in AstraDBVectorStore: {str(e)}\") from e\n\n logger.debug(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n logger.debug(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n else:\n logger.debug(\"No search input provided. Skipping search.\")\n return []\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" }, "collection_indexing_policy": { "advanced": true, diff --git a/src/backend/base/pyproject.toml b/src/backend/base/pyproject.toml index a0a234816c98..789211e22568 100644 --- a/src/backend/base/pyproject.toml +++ b/src/backend/base/pyproject.toml @@ -166,7 +166,7 @@ readme = "README.md" dependencies = [ "fastapi>=0.111.0", - "httpx", + "httpx>=0.27", "uvicorn>=0.30.0", "gunicorn>=22.0.0", "langchain~=0.2.0", @@ -178,7 +178,6 @@ dependencies = [ "langchain-experimental>=0.0.61", "pydantic>=2.7.0", "pydantic-settings>=2.2.0", - "websockets", "typer>=0.12.0", "cachetools>=5.3.1", "platformdirs>=4.2.0", @@ -224,22 +223,22 @@ dependencies = [ # Optional dependencies for uv [project.optional-dependencies] deploy = [ - "celery", - "redis", - "flower" + "celery>=5.3.1", + "redis>=4.6.0", + "flower>=1.0.0" ] local = [ - "llama-cpp-python", - "sentence-transformers", - "ctransformers" + "llama-cpp-python>=0.2.0", + "sentence-transformers>=2.0.0", + "ctransformers>=0.2" ] all = [ - "celery", - "redis", - "flower", - "llama-cpp-python", - "sentence-transformers", - "ctransformers" + "celery>=5.3.1", + "redis>=4.6.0", + "flower>=1.0.0", + "llama-cpp-python>=0.2.0", + "sentence-transformers>=2.0.0", + "ctransformers>=0.2" ] # Development dependencies @@ -248,7 +247,7 @@ dev = [ "ipykernel>=6.29.0", "mypy>=1.11.0", "ruff>=0.4.5", - "httpx", + "httpx>=0.27", "pytest>=8.2.0", "types-requests>=2.32.0", "requests>=2.32.0", diff --git a/uv.lock b/uv.lock index d4e4f68b719b..2486ca860cfc 100644 --- a/uv.lock +++ b/uv.lock @@ -335,14 +335,14 @@ wheels = [ [[package]] name = "authlib" -version = "1.3.2" +version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cryptography" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f3/75/47dbab150ef6f9298e227a40c93c7fed5f3ffb67c9fb62cd49f66285e46e/authlib-1.3.2.tar.gz", hash = "sha256:4b16130117f9eb82aa6eec97f6dd4673c3f960ac0283ccdae2897ee4bc030ba2", size = 147313 } +sdist = { url = "https://files.pythonhosted.org/packages/09/47/df70ecd34fbf86d69833fe4e25bb9ecbaab995c8e49df726dd416f6bb822/authlib-1.3.1.tar.gz", hash = "sha256:7ae843f03c06c5c0debd63c9db91f9fda64fa62a42a77419fa15fbb7e7a58917", size = 146074 } wheels = [ - { url = "https://files.pythonhosted.org/packages/df/4c/9aa0416a403d5cc80292cb030bcd2c918cce2755e314d8c1aa18656e1e12/Authlib-1.3.2-py2.py3-none-any.whl", hash = "sha256:ede026a95e9f5cdc2d4364a52103f5405e75aa156357e831ef2bfd0bc5094dfc", size = 225111 }, + { url = "https://files.pythonhosted.org/packages/87/1f/bc95e43ffb57c05b8efcc376dd55a0240bf58f47ddf5a0f92452b6457b75/Authlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:d35800b973099bbadc49b42b256ecb80041ad56b7fe1216a362c7943c088f377", size = 223827 }, ] [[package]] @@ -2321,6 +2321,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/66/2b/a6e68d7ea6f4fbc31cce20e354d6cef484da0a9891ee6a3eaf3aa9659d01/grpcio-1.66.1-cp312-cp312-win_amd64.whl", hash = "sha256:b0aa03d240b5539648d996cc60438f128c7f46050989e35b25f5c18286c86734", size = 4275565 }, ] +[[package]] +name = "grpcio-health-checking" +version = "1.62.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "grpcio" }, + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/9f/09df9b02fc8eafa3031d878c8a4674a0311293c8c6f1c942cdaeec204126/grpcio-health-checking-1.62.3.tar.gz", hash = "sha256:5074ba0ce8f0dcfe328408ec5c7551b2a835720ffd9b69dade7fa3e0dc1c7a93", size = 15640 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/4c/ee3173906196b741ac6ba55a9788ba9ebf2cd05f91715a49b6c3bfbb9d73/grpcio_health_checking-1.62.3-py3-none-any.whl", hash = "sha256:f29da7dd144d73b4465fe48f011a91453e9ff6c8af0d449254cf80021cab3e0d", size = 18547 }, +] + [[package]] name = "grpcio-status" version = "1.62.3" @@ -2484,7 +2497,7 @@ wheels = [ [[package]] name = "httpx" -version = "0.27.2" +version = "0.27.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2493,9 +2506,9 @@ dependencies = [ { name = "idna" }, { name = "sniffio" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/78/82/08f8c936781f67d9e6b9eeb8a0c8b4e406136ea4c3d1f89a5db71d42e0e6/httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2", size = 144189 } +sdist = { url = "https://files.pythonhosted.org/packages/5c/2d/3da5bdf4408b8b2800061c339f240c1802f2e82d55e50bd39c5a881f47f0/httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5", size = 126413 } wheels = [ - { url = "https://files.pythonhosted.org/packages/56/95/9377bcb415797e44274b51d46e3249eba641711cf3348050f76ee7b15ffc/httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0", size = 76395 }, + { url = "https://files.pythonhosted.org/packages/41/7b/ddacf6dcebb42466abd03f368782142baa82e08fc0c1f8eaa05b4bae87d5/httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5", size = 75590 }, ] [package.optional-dependencies] @@ -3528,7 +3541,7 @@ requires-dist = [ { name = "celery", extras = ["redis"], marker = "extra == 'deploy'", specifier = ">=5.3.6" }, { name = "certifi", specifier = ">=2023.11.17,<2025.0.0" }, { name = "chromadb", specifier = ">=0.4" }, - { name = "clickhouse-connect", extras = ["clickhouse-connect"], marker = "extra == 'clickhouse-connect'", specifier = "==0.7.19" }, + { name = "clickhouse-connect", marker = "extra == 'clickhouse-connect'", specifier = "==0.7.19" }, { name = "cohere", specifier = ">=5.5.3" }, { name = "composio-langchain", specifier = "==0.5.9" }, { name = "couchbase", marker = "extra == 'couchbase'", specifier = ">=4.2.1" }, @@ -3597,7 +3610,7 @@ requires-dist = [ { name = "supabase", specifier = ">=2.3.0" }, { name = "types-cachetools", specifier = ">=5.3.0.5" }, { name = "upstash-vector", specifier = ">=0.5.0" }, - { name = "weaviate-client" }, + { name = "weaviate-client", specifier = ">=4.8" }, { name = "wikipedia", specifier = ">=1.4.0" }, { name = "wolframalpha", specifier = ">=5.1.3" }, { name = "yfinance", specifier = ">=0.2.40" }, @@ -3608,7 +3621,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ { name = "dictdiffer", specifier = ">=0.9.0" }, - { name = "httpx" }, + { name = "httpx", specifier = ">=0.27.0" }, { name = "ipykernel", specifier = ">=6.29.0" }, { name = "mypy", specifier = ">=1.11.0" }, { name = "packaging", specifier = ">=23.2,<24.0" }, @@ -3697,7 +3710,6 @@ dependencies = [ { name = "typer" }, { name = "uncurl" }, { name = "uvicorn" }, - { name = "websockets" }, ] [package.optional-dependencies] @@ -3759,14 +3771,14 @@ requires-dist = [ { name = "asyncer", specifier = ">=0.0.5" }, { name = "bcrypt", specifier = "==4.0.1" }, { name = "cachetools", specifier = ">=5.3.1" }, - { name = "celery", marker = "extra == 'all'" }, - { name = "celery", marker = "extra == 'deploy'" }, + { name = "celery", marker = "extra == 'all'", specifier = ">=5.3.1" }, + { name = "celery", marker = "extra == 'deploy'", specifier = ">=5.3.1" }, { name = "chardet", specifier = ">=5.2.0" }, { name = "clickhouse-connect", specifier = "==0.7.19" }, { name = "crewai", specifier = ">=0.36.0" }, { name = "cryptography", specifier = ">=42.0.5,<44.0.0" }, - { name = "ctransformers", marker = "extra == 'all'" }, - { name = "ctransformers", marker = "extra == 'local'" }, + { name = "ctransformers", marker = "extra == 'all'", specifier = ">=0.2" }, + { name = "ctransformers", marker = "extra == 'local'", specifier = ">=0.2" }, { name = "devtools", marker = "extra == 'dev'", specifier = ">=0.12.2" }, { name = "dictdiffer", marker = "extra == 'dev'", specifier = ">=0.9.0" }, { name = "diskcache", specifier = ">=5.6.3" }, @@ -3776,20 +3788,20 @@ requires-dist = [ { name = "fastapi", specifier = ">=0.111.0" }, { name = "filelock", specifier = ">=3.15.4" }, { name = "firecrawl-py", specifier = ">=0.0.16" }, - { name = "flower", marker = "extra == 'all'" }, - { name = "flower", marker = "extra == 'deploy'" }, + { name = "flower", marker = "extra == 'all'", specifier = ">=1.0.0" }, + { name = "flower", marker = "extra == 'deploy'", specifier = ">=1.0.0" }, { name = "grandalf", specifier = ">=0.8.0" }, { name = "gunicorn", specifier = ">=22.0.0" }, - { name = "httpx" }, - { name = "httpx", marker = "extra == 'dev'" }, + { name = "httpx", specifier = ">=0.27" }, + { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.27" }, { name = "ipykernel", marker = "extra == 'dev'", specifier = ">=6.29.0" }, { name = "jq", marker = "sys_platform != 'win32'", specifier = ">=1.7.0" }, { name = "langchain", specifier = "~=0.2.0" }, { name = "langchain-core", specifier = ">=0.2.32" }, { name = "langchain-experimental", specifier = ">=0.0.61" }, { name = "langchainhub", specifier = "~=0.1.15" }, - { name = "llama-cpp-python", marker = "extra == 'all'" }, - { name = "llama-cpp-python", marker = "extra == 'local'" }, + { name = "llama-cpp-python", marker = "extra == 'all'", specifier = ">=0.2.0" }, + { name = "llama-cpp-python", marker = "extra == 'local'", specifier = ">=0.2.0" }, { name = "loguru", specifier = ">=0.7.1" }, { name = "multiprocess", specifier = ">=0.70.14" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.11.0" }, @@ -3824,14 +3836,14 @@ requires-dist = [ { name = "python-docx", specifier = ">=1.1.0" }, { name = "python-jose", specifier = ">=3.3.0" }, { name = "python-multipart", specifier = ">=0.0.7" }, - { name = "redis", marker = "extra == 'all'" }, - { name = "redis", marker = "extra == 'deploy'" }, + { name = "redis", marker = "extra == 'all'", specifier = ">=4.6.0" }, + { name = "redis", marker = "extra == 'deploy'", specifier = ">=4.6.0" }, { name = "requests", marker = "extra == 'dev'", specifier = ">=2.32.0" }, { name = "respx", marker = "extra == 'dev'", specifier = ">=0.21.1" }, { name = "rich", specifier = ">=13.7.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.5" }, - { name = "sentence-transformers", marker = "extra == 'all'" }, - { name = "sentence-transformers", marker = "extra == 'local'" }, + { name = "sentence-transformers", marker = "extra == 'all'", specifier = ">=2.0.0" }, + { name = "sentence-transformers", marker = "extra == 'local'", specifier = ">=2.0.0" }, { name = "sentry-sdk", extras = ["fastapi", "loguru"], specifier = ">=2.5.1" }, { name = "setuptools", specifier = ">=70" }, { name = "spider-client", specifier = ">=0.0.27" }, @@ -3849,7 +3861,6 @@ requires-dist = [ { name = "uncurl", specifier = ">=0.0.11" }, { name = "uvicorn", specifier = ">=0.30.0" }, { name = "vulture", marker = "extra == 'dev'", specifier = ">=2.11" }, - { name = "websockets" }, ] [[package]] @@ -7876,16 +7887,21 @@ wheels = [ [[package]] name = "weaviate-client" -version = "3.26.7" +version = "4.8.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "authlib" }, + { name = "grpcio" }, + { name = "grpcio-health-checking" }, + { name = "grpcio-tools" }, + { name = "httpx" }, + { name = "pydantic" }, { name = "requests" }, { name = "validators" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f8/2e/9588bae34c1d67d05ccc07d74a4f5d73cce342b916f79ab3a9114c6607bb/weaviate_client-3.26.7.tar.gz", hash = "sha256:ea538437800abc6edba21acf213accaf8a82065584ee8b914bae4a4ad4ef6b70", size = 210480 } +sdist = { url = "https://files.pythonhosted.org/packages/4f/4d/650831937f25b8e788870b46a693a6e141d9d3d72bfd708ce88b0b01d69f/weaviate_client-4.8.1.tar.gz", hash = "sha256:2756996a2205bb991f258c064fc502011fc78a40e8786cb072208b1d3d7c9932", size = 681877 } wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/95/fb326052bc1d73cb3c19fcfaf6ebb477f896af68de07eaa1337e27ee57fa/weaviate_client-3.26.7-py3-none-any.whl", hash = "sha256:48b8d4b71df881b4e5e15964d7ac339434338ccee73779e3af7eab698a92083b", size = 120051 }, + { url = "https://files.pythonhosted.org/packages/c8/d8/88610f5aaaffd3d2447fe755b86a8bb06b79472e45ec999baa5040dea9a3/weaviate_client-4.8.1-py3-none-any.whl", hash = "sha256:c16453ebfd9bd4045675f8e50841d1af21aa9af1332f379d0418c4531c03bd44", size = 374526 }, ] [[package]]