Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LangChain Community: VectorStores: Azure Cosmos DB Mongo vCore with DiskANN #27329

Merged
merged 18 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 167 additions & 28 deletions docs/docs/integrations/vectorstores/azure_cosmos_db.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,6 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\r\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\r\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\r\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
Expand Down Expand Up @@ -74,7 +71,7 @@
"id": "f2e66b097c6ce2e3",
"metadata": {},
"source": [
"We want to use `OpenAIEmbeddings` so we need to set up our Azure OpenAI API Key alongside other environment variables. "
"We want to use `AzureOpenAIEmbeddings` so we need to set up our Azure OpenAI API Key alongside other environment variables. "
]
},
{
Expand All @@ -90,15 +87,10 @@
"outputs": [],
"source": [
"# Set up the OpenAI Environment Variables\n",
"os.environ[\"OPENAI_API_TYPE\"] = \"azure\"\n",
"os.environ[\"OPENAI_API_VERSION\"] = \"2023-05-15\"\n",
"os.environ[\"OPENAI_API_BASE\"] = (\n",
" \"YOUR_OPEN_AI_ENDPOINT\" # https://example.openai.azure.com/\n",
")\n",
"os.environ[\"OPENAI_API_KEY\"] = \"YOUR_OPENAI_API_KEY\"\n",
"os.environ[\"OPENAI_EMBEDDINGS_DEPLOYMENT\"] = (\n",
" \"smart-agent-embedding-ada\" # the deployment name for the embedding model\n",
")\n",
"\n",
"os.environ[\"AZURE_OPENAI_API_KEY\"] = \"YOUR_AZURE_OPENAI_API_KEY\"\n",
"os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"YOUR_AZURE_OPENAI_ENDPOINT\"\n",
"os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"2023-05-15\"\n",
"os.environ[\"OPENAI_EMBEDDINGS_MODEL_NAME\"] = \"text-embedding-ada-002\" # the model name"
]
},
Expand Down Expand Up @@ -130,7 +122,7 @@
" CosmosDBSimilarityType,\n",
" CosmosDBVectorSearchType,\n",
")\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_openai import AzureOpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"SOURCE_FILE_NAME = \"../../how_to/state_of_the_union.txt\"\n",
Expand All @@ -147,14 +139,35 @@
"model_name = os.getenv(\"OPENAI_EMBEDDINGS_MODEL_NAME\", \"text-embedding-ada-002\")\n",
"\n",
"\n",
"openai_embeddings: OpenAIEmbeddings = OpenAIEmbeddings(\n",
" deployment=model_deployment, model=model_name, chunk_size=1\n",
"openai_embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(\n",
" model=model_name, chunk_size=1\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f6c6ed80-7b91-4833-bab5-c9b2b5edcdec",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.')"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"docs[0]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "39ae6058c2f7fdf1",
"metadata": {
"ExecuteTime": {
Expand All @@ -166,14 +179,10 @@
{
"data": {
"text/plain": [
"{'raw': {'defaultShard': {'numIndexesBefore': 1,\n",
" 'numIndexesAfter': 2,\n",
" 'createdCollectionAutomatically': False,\n",
" 'ok': 1}},\n",
" 'ok': 1}"
"'\\n# DiskANN vectorstore\\nmaxDegree = 40\\ndimensions = 1536\\nsimilarity_algorithm = CosmosDBSimilarityType.COS\\nkind = CosmosDBVectorSearchType.VECTOR_DISKANN\\nlBuild = 20\\n\\nvectorstore.create_index(\\n dimensions=dimensions,\\n similarity=similarity_algorithm,\\n kind=kind ,\\n max_degree=maxDegree,\\n l_build=lBuild,\\n )\\n\\n# -----------------------------------------------------------\\n\\n# HNSW vectorstore\\ndimensions = 1536\\nsimilarity_algorithm = CosmosDBSimilarityType.COS\\nkind = CosmosDBVectorSearchType.VECTOR_HNSW\\nm = 16\\nef_construction = 64\\n\\nvectorstore.create_index(\\n dimensions=dimensions,\\n similarity=similarity_algorithm,\\n kind=kind ,\\n m=m,\\n ef_construction=ef_construction,\\n )\\n'"
]
},
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -212,12 +221,46 @@
"\n",
"vectorstore.create_index(\n",
" num_lists, dimensions, similarity_algorithm, kind, m, ef_construction\n",
")"
")\n",
"\n",
"\"\"\"\n",
"# DiskANN vectorstore\n",
"maxDegree = 40\n",
"dimensions = 1536\n",
"similarity_algorithm = CosmosDBSimilarityType.COS\n",
"kind = CosmosDBVectorSearchType.VECTOR_DISKANN\n",
"lBuild = 20\n",
"\n",
"vectorstore.create_index(\n",
" dimensions=dimensions,\n",
" similarity=similarity_algorithm,\n",
" kind=kind ,\n",
" max_degree=maxDegree,\n",
" l_build=lBuild,\n",
" )\n",
"\n",
"# -----------------------------------------------------------\n",
"\n",
"# HNSW vectorstore\n",
"dimensions = 1536\n",
"similarity_algorithm = CosmosDBSimilarityType.COS\n",
"kind = CosmosDBVectorSearchType.VECTOR_HNSW\n",
"m = 16\n",
"ef_construction = 64\n",
"\n",
"vectorstore.create_index(\n",
" dimensions=dimensions,\n",
" similarity=similarity_algorithm,\n",
" kind=kind ,\n",
" m=m,\n",
" ef_construction=ef_construction,\n",
" )\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"id": "32c68d3246adc21f",
"metadata": {
"ExecuteTime": {
Expand All @@ -234,7 +277,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"id": "8feeeb4364efb204",
"metadata": {
"ExecuteTime": {
Expand Down Expand Up @@ -271,7 +314,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "3c218ab6f59301f7",
"metadata": {
"ExecuteTime": {
Expand Down Expand Up @@ -308,7 +351,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"id": "fd67e4d92c9ab32f",
"metadata": {
"ExecuteTime": {
Expand Down Expand Up @@ -352,10 +395,106 @@
"Azure Cosmos DB for MongoDB supports pre-filtering with $lt, $lte, $eq, $neq, $gte, $gt, $in, $nin, and $regex. To use this feature, enable \"filtering vector search\" in the \"Preview Features\" tab of your Azure Subscription. Learn more about preview features [here](https://learn.microsoft.com/azure/cosmos-db/mongodb/vcore/vector-search#filtered-vector-search-preview)."
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "19c43de6-47f9-45f0-a422-8d852a5d191f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'raw': {'defaultShard': {'numIndexesBefore': 3,\n",
" 'numIndexesAfter': 4,\n",
" 'createdCollectionAutomatically': False,\n",
" 'ok': 1}},\n",
" 'ok': 1}"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# create a filter index\n",
"vectorstore.create_filter_index(\n",
" property_to_filter=\"metadata.source\", index_name=\"filter_index\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "c7031279-dfb8-43f2-a7a8-d10a3786023b",
"metadata": {},
"outputs": [],
"source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = vectorstore.similarity_search(\n",
" query, pre_filter={\"metadata.source\": {\"$ne\": \"filter content\"}}\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "3860be72-d293-43b9-a727-425f166ff6c6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(docs)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "b7fb9800-b1cf-4315-af9d-e8c572d3e05f",
"metadata": {},
"outputs": [],
"source": [
"docs = vectorstore.similarity_search(\n",
" query,\n",
" pre_filter={\"metadata.source\": {\"$ne\": \"../../how_to/state_of_the_union.txt\"}},\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "dba9d39e-6220-4fad-84fa-e123aa7ca6e4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(docs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "50bb4346",
"id": "25ea7250-6e8f-48e6-aac9-196effbdc8d8",
"metadata": {},
"outputs": [],
"source": []
Expand Down
Loading
Loading