From 95df62b92dc29db922b14adc7dc4bbf96477e4ec Mon Sep 17 00:00:00 2001 From: Li Jiang Date: Mon, 23 Sep 2024 09:43:53 +0000 Subject: [PATCH 1/8] Fix chromadb error type --- autogen/agentchat/contrib/vectordb/chromadb.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/autogen/agentchat/contrib/vectordb/chromadb.py b/autogen/agentchat/contrib/vectordb/chromadb.py index 1ed8708409d3..f02c6379fc06 100644 --- a/autogen/agentchat/contrib/vectordb/chromadb.py +++ b/autogen/agentchat/contrib/vectordb/chromadb.py @@ -1,6 +1,8 @@ import os from typing import Callable, List +import chromadb.errors + from .base import Document, ItemID, QueryResults, VectorDB from .utils import chroma_results_to_query_results, filter_results_by_distance, get_logger @@ -84,7 +86,7 @@ def create_collection( collection = self.active_collection else: collection = self.client.get_collection(collection_name, embedding_function=self.embedding_function) - except ValueError: + except (ValueError, chromadb.errors.ChromaError): collection = None if collection is None: return self.client.create_collection( From 5ca654f8377f82eae21c85d03af9677f06e92c99 Mon Sep 17 00:00:00 2001 From: Li Jiang Date: Mon, 23 Sep 2024 10:05:45 +0000 Subject: [PATCH 2/8] Update docs --- notebook/agentchat_RetrieveChat.ipynb | 4 +++- website/blog/2023-10-18-RetrieveChat/index.mdx | 3 ++- website/docs/installation/Optional-Dependencies.md | 1 + website/docs/topics/retrieval_augmentation.md | 1 + 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/notebook/agentchat_RetrieveChat.ipynb b/notebook/agentchat_RetrieveChat.ipynb index eee192c4f826..6c41681a0aee 100644 --- a/notebook/agentchat_RetrieveChat.ipynb +++ b/notebook/agentchat_RetrieveChat.ipynb @@ -31,6 +31,8 @@ "pip install pyautogen[retrievechat] flaml[automl]\n", "```\n", "\n", + "> _You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551)_\n", + "\n", "For more information, please refer to the [installation guide](/docs/installation/).\n", ":::\n", "````" @@ -2785,7 +2787,7 @@ ] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "autogen312", "language": "python", "name": "python3" }, diff --git a/website/blog/2023-10-18-RetrieveChat/index.mdx b/website/blog/2023-10-18-RetrieveChat/index.mdx index 91b8b5012a3b..d5a205505a7a 100644 --- a/website/blog/2023-10-18-RetrieveChat/index.mdx +++ b/website/blog/2023-10-18-RetrieveChat/index.mdx @@ -4,7 +4,7 @@ authors: thinkall tags: [LLM, RAG] --- -*Last update: August 14, 2024; AutoGen version: v0.2.35* +*Last update: September 23, 2024; AutoGen version: v0.2.35* ![RAG Architecture](img/retrievechat-arch.png) @@ -56,6 +56,7 @@ Please install pyautogen with the [retrievechat] option before using RAG agents. ```bash pip install "pyautogen[retrievechat]" ``` +> _You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551)_ RetrieveChat can handle various types of documents. By default, it can process plain text and PDF files, including formats such as 'txt', 'json', 'csv', 'tsv', diff --git a/website/docs/installation/Optional-Dependencies.md b/website/docs/installation/Optional-Dependencies.md index 2d0067c9950e..33cedba35c6f 100644 --- a/website/docs/installation/Optional-Dependencies.md +++ b/website/docs/installation/Optional-Dependencies.md @@ -49,6 +49,7 @@ Example notebooks: ```bash pip install "pyautogen[retrievechat]" ``` +> _You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551)_ Alternatively `pyautogen` also supports PGVector and Qdrant which can be installed in place of ChromaDB, or alongside it. diff --git a/website/docs/topics/retrieval_augmentation.md b/website/docs/topics/retrieval_augmentation.md index 3c428f164868..8bf9427eb2d9 100644 --- a/website/docs/topics/retrieval_augmentation.md +++ b/website/docs/topics/retrieval_augmentation.md @@ -56,6 +56,7 @@ ragproxyagent.initiate_chat( assistant, message=ragproxyagent.message_generator, problem=code_problem, search_string="spark" ) # search_string is used as an extra filter for the embeddings search, in this case, we only want to search documents that contain "spark". ``` +> _You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551)_ ## Example Setup: RAG with Retrieval Augmented Agents with PGVector The following is an example setup demonstrating how to create retrieval augmented agents in AutoGen: From 1944c82ae61579e653b320e6932f756010426401 Mon Sep 17 00:00:00 2001 From: Li Jiang Date: Mon, 23 Sep 2024 10:18:35 +0000 Subject: [PATCH 3/8] Improve robustness --- autogen/agentchat/contrib/vectordb/chromadb.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/autogen/agentchat/contrib/vectordb/chromadb.py b/autogen/agentchat/contrib/vectordb/chromadb.py index f02c6379fc06..bef4a1090219 100644 --- a/autogen/agentchat/contrib/vectordb/chromadb.py +++ b/autogen/agentchat/contrib/vectordb/chromadb.py @@ -1,8 +1,6 @@ import os from typing import Callable, List -import chromadb.errors - from .base import Document, ItemID, QueryResults, VectorDB from .utils import chroma_results_to_query_results, filter_results_by_distance, get_logger @@ -16,6 +14,11 @@ except ImportError: raise ImportError("Please install chromadb: `pip install chromadb`") +try: + from chromadb.errors import ChromaError +except ImportError: + ChromaError = Exception + CHROMADB_MAX_BATCH_SIZE = os.environ.get("CHROMADB_MAX_BATCH_SIZE", 40000) logger = get_logger(__name__) @@ -86,7 +89,7 @@ def create_collection( collection = self.active_collection else: collection = self.client.get_collection(collection_name, embedding_function=self.embedding_function) - except (ValueError, chromadb.errors.ChromaError): + except (ValueError, ChromaError): collection = None if collection is None: return self.client.create_collection( From 09d7b7c00202c0adf4c814a43b4032641fa3bc6b Mon Sep 17 00:00:00 2001 From: Li Jiang Date: Mon, 23 Sep 2024 10:19:21 +0000 Subject: [PATCH 4/8] Fix tests --- test/agentchat/contrib/vectordb/test_chromadb.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/test/agentchat/contrib/vectordb/test_chromadb.py b/test/agentchat/contrib/vectordb/test_chromadb.py index ee4886f5154d..19b365db10ba 100644 --- a/test/agentchat/contrib/vectordb/test_chromadb.py +++ b/test/agentchat/contrib/vectordb/test_chromadb.py @@ -15,6 +15,11 @@ else: skip = False +try: + from chromadb.errors import ChromaError +except ImportError: + ChromaError = Exception + @pytest.mark.skipif(skip, reason="dependency is not installed") def test_chromadb(): @@ -26,12 +31,14 @@ def test_chromadb(): # test_delete_collection db.delete_collection(collection_name) - pytest.raises(ValueError, db.get_collection, collection_name) + pytest.raises((ValueError, ChromaError), db.get_collection, collection_name) # test more create collection collection = db.create_collection(collection_name, overwrite=False, get_or_create=False) assert collection.name == collection_name - pytest.raises(ValueError, db.create_collection, collection_name, overwrite=False, get_or_create=False) + pytest.raises( + (ValueError, ChromaError), db.create_collection, collection_name, overwrite=False, get_or_create=False + ) collection = db.create_collection(collection_name, overwrite=True, get_or_create=False) assert collection.name == collection_name collection = db.create_collection(collection_name, overwrite=False, get_or_create=True) From c0f5782efef9a6365cddb0ff8ea3710df9429ac0 Mon Sep 17 00:00:00 2001 From: Li Jiang Date: Mon, 23 Sep 2024 10:25:38 +0000 Subject: [PATCH 5/8] Fix docs --- website/blog/2023-10-18-RetrieveChat/index.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/2023-10-18-RetrieveChat/index.mdx b/website/blog/2023-10-18-RetrieveChat/index.mdx index d5a205505a7a..331afcd8ce68 100644 --- a/website/blog/2023-10-18-RetrieveChat/index.mdx +++ b/website/blog/2023-10-18-RetrieveChat/index.mdx @@ -56,7 +56,7 @@ Please install pyautogen with the [retrievechat] option before using RAG agents. ```bash pip install "pyautogen[retrievechat]" ``` -> _You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551)_ +> You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551). RetrieveChat can handle various types of documents. By default, it can process plain text and PDF files, including formats such as 'txt', 'json', 'csv', 'tsv', From 69d60ecf1e28752eab9176bfdd675d1703c13cdd Mon Sep 17 00:00:00 2001 From: Li Jiang Date: Mon, 23 Sep 2024 10:30:42 +0000 Subject: [PATCH 6/8] Fix docs --- website/blog/2023-10-18-RetrieveChat/index.mdx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/website/blog/2023-10-18-RetrieveChat/index.mdx b/website/blog/2023-10-18-RetrieveChat/index.mdx index 331afcd8ce68..6093f699ab22 100644 --- a/website/blog/2023-10-18-RetrieveChat/index.mdx +++ b/website/blog/2023-10-18-RetrieveChat/index.mdx @@ -56,7 +56,8 @@ Please install pyautogen with the [retrievechat] option before using RAG agents. ```bash pip install "pyautogen[retrievechat]" ``` -> You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551). + +*You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551).* RetrieveChat can handle various types of documents. By default, it can process plain text and PDF files, including formats such as 'txt', 'json', 'csv', 'tsv', From fc65acf30d8819cb2c5f1656daed5ef62de837b8 Mon Sep 17 00:00:00 2001 From: Li Jiang Date: Mon, 23 Sep 2024 10:39:46 +0000 Subject: [PATCH 7/8] Fix docs --- notebook/agentchat_RetrieveChat.ipynb | 2 +- website/docs/installation/Optional-Dependencies.md | 2 +- website/docs/topics/retrieval_augmentation.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/notebook/agentchat_RetrieveChat.ipynb b/notebook/agentchat_RetrieveChat.ipynb index 6c41681a0aee..cfbdb8971924 100644 --- a/notebook/agentchat_RetrieveChat.ipynb +++ b/notebook/agentchat_RetrieveChat.ipynb @@ -31,7 +31,7 @@ "pip install pyautogen[retrievechat] flaml[automl]\n", "```\n", "\n", - "> _You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551)_\n", + "> You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551).\n", "\n", "For more information, please refer to the [installation guide](/docs/installation/).\n", ":::\n", diff --git a/website/docs/installation/Optional-Dependencies.md b/website/docs/installation/Optional-Dependencies.md index 33cedba35c6f..ddb40d0c423b 100644 --- a/website/docs/installation/Optional-Dependencies.md +++ b/website/docs/installation/Optional-Dependencies.md @@ -49,7 +49,7 @@ Example notebooks: ```bash pip install "pyautogen[retrievechat]" ``` -> _You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551)_ +> You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551). Alternatively `pyautogen` also supports PGVector and Qdrant which can be installed in place of ChromaDB, or alongside it. diff --git a/website/docs/topics/retrieval_augmentation.md b/website/docs/topics/retrieval_augmentation.md index 8bf9427eb2d9..a921db0da9ba 100644 --- a/website/docs/topics/retrieval_augmentation.md +++ b/website/docs/topics/retrieval_augmentation.md @@ -56,7 +56,7 @@ ragproxyagent.initiate_chat( assistant, message=ragproxyagent.message_generator, problem=code_problem, search_string="spark" ) # search_string is used as an extra filter for the embeddings search, in this case, we only want to search documents that contain "spark". ``` -> _You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551)_ +> You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551). ## Example Setup: RAG with Retrieval Augmented Agents with PGVector The following is an example setup demonstrating how to create retrieval augmented agents in AutoGen: From 95c58267e75bc10de134f9c3be62305a0595a54a Mon Sep 17 00:00:00 2001 From: Li Jiang Date: Mon, 23 Sep 2024 19:43:28 +0800 Subject: [PATCH 8/8] Fix docs --- notebook/agentchat_RetrieveChat.ipynb | 2 +- website/blog/2023-10-18-RetrieveChat/index.mdx | 2 +- website/docs/installation/Optional-Dependencies.md | 2 +- website/docs/topics/retrieval_augmentation.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/notebook/agentchat_RetrieveChat.ipynb b/notebook/agentchat_RetrieveChat.ipynb index cfbdb8971924..0b829835a0a3 100644 --- a/notebook/agentchat_RetrieveChat.ipynb +++ b/notebook/agentchat_RetrieveChat.ipynb @@ -31,7 +31,7 @@ "pip install pyautogen[retrievechat] flaml[automl]\n", "```\n", "\n", - "> You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551).\n", + "*You'll need to install `chromadb<=0.5.0` if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551).*\n", "\n", "For more information, please refer to the [installation guide](/docs/installation/).\n", ":::\n", diff --git a/website/blog/2023-10-18-RetrieveChat/index.mdx b/website/blog/2023-10-18-RetrieveChat/index.mdx index 6093f699ab22..d3ad6aff3287 100644 --- a/website/blog/2023-10-18-RetrieveChat/index.mdx +++ b/website/blog/2023-10-18-RetrieveChat/index.mdx @@ -57,7 +57,7 @@ Please install pyautogen with the [retrievechat] option before using RAG agents. pip install "pyautogen[retrievechat]" ``` -*You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551).* +*You'll need to install `chromadb<=0.5.0` if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551).* RetrieveChat can handle various types of documents. By default, it can process plain text and PDF files, including formats such as 'txt', 'json', 'csv', 'tsv', diff --git a/website/docs/installation/Optional-Dependencies.md b/website/docs/installation/Optional-Dependencies.md index ddb40d0c423b..820b8f18827f 100644 --- a/website/docs/installation/Optional-Dependencies.md +++ b/website/docs/installation/Optional-Dependencies.md @@ -49,7 +49,7 @@ Example notebooks: ```bash pip install "pyautogen[retrievechat]" ``` -> You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551). +*You'll need to install `chromadb<=0.5.0` if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551).* Alternatively `pyautogen` also supports PGVector and Qdrant which can be installed in place of ChromaDB, or alongside it. diff --git a/website/docs/topics/retrieval_augmentation.md b/website/docs/topics/retrieval_augmentation.md index a921db0da9ba..7fdd39f31f92 100644 --- a/website/docs/topics/retrieval_augmentation.md +++ b/website/docs/topics/retrieval_augmentation.md @@ -56,7 +56,7 @@ ragproxyagent.initiate_chat( assistant, message=ragproxyagent.message_generator, problem=code_problem, search_string="spark" ) # search_string is used as an extra filter for the embeddings search, in this case, we only want to search documents that contain "spark". ``` -> You'll need to install chromadb<=0.5.0 if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551). +*You'll need to install `chromadb<=0.5.0` if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551).* ## Example Setup: RAG with Retrieval Augmented Agents with PGVector The following is an example setup demonstrating how to create retrieval augmented agents in AutoGen: