diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml index 339c36527ba6..5c334eec3b0d 100644 --- a/.github/workflows/openai.yml +++ b/.github/workflows/openai.yml @@ -57,9 +57,9 @@ jobs: run: | pip install -e .[teachable] - name: Install packages for RetrieveChat with QDrant when needed - if: matrix.python-version == '3.9' + if: matrix.python-version == '3.11' run: | - pip install qdrant_client[fastembed] + pip install -e .[retrievechat] qdrant_client[fastembed] - name: Coverage if: matrix.python-version == '3.9' env: @@ -80,6 +80,7 @@ jobs: OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }} run: | pip install nbconvert nbformat ipykernel + coverage run -a -m pytest test/agentchat/test_qdrant_retrievechat.py coverage run -a -m pytest test/test_with_openai.py coverage run -a -m pytest test/test_notebook.py coverage xml diff --git a/autogen/retrieve_utils.py b/autogen/retrieve_utils.py index b6edba7a1e40..bc4fdfb75976 100644 --- a/autogen/retrieve_utils.py +++ b/autogen/retrieve_utils.py @@ -1,9 +1,8 @@ -from typing import List, Union, Dict, Tuple, Callable +from typing import List, Union, Callable import os import requests from urllib.parse import urlparse import glob -import tiktoken import chromadb if chromadb.__version__ < "0.4.15": diff --git a/autogen/version.py b/autogen/version.py index 3cb7d95ef824..fb69db9cf4de 100644 --- a/autogen/version.py +++ b/autogen/version.py @@ -1 +1 @@ -__version__ = "0.1.13" +__version__ = "0.1.14" diff --git a/setup.py b/setup.py index 891eaba17884..d3308bf1ccc6 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ ], "blendsearch": ["flaml[blendsearch]"], "mathchat": ["sympy", "pydantic==1.10.9", "wolframalpha"], - "retrievechat": ["chromadb", "tiktoken", "sentence_transformers", "pypdf"], + "retrievechat": ["chromadb", "tiktoken", "sentence_transformers", "pypdf", "ipython"], "teachable": ["chromadb"], }, classifiers=[ diff --git a/test/agentchat/test_retrievechat.py b/test/agentchat/test_retrievechat.py index 99e395de5056..d71d146194ba 100644 --- a/test/agentchat/test_retrievechat.py +++ b/test/agentchat/test_retrievechat.py @@ -10,7 +10,6 @@ from autogen.agentchat.contrib.retrieve_user_proxy_agent import ( RetrieveUserProxyAgent, ) - from autogen.retrieve_utils import create_vector_db_from_dir, query_vector_db import chromadb from chromadb.utils import embedding_functions as ef @@ -61,6 +60,7 @@ def test_retrievechat(): "model": config_list[0]["model"], "client": chromadb.PersistentClient(path="/tmp/chromadb"), "embedding_function": sentence_transformer_ef, + "get_or_create": True, }, ) @@ -72,26 +72,5 @@ def test_retrievechat(): print(conversations) -@pytest.mark.skipif( - sys.platform in ["darwin", "win32"] or skip_test, - reason="do not run on MacOS or windows", -) -def test_retrieve_utils(): - client = chromadb.PersistentClient(path="/tmp/chromadb") - create_vector_db_from_dir(dir_path="./website/docs", client=client, collection_name="autogen-docs") - results = query_vector_db( - query_texts=[ - "How can I use AutoGen UserProxyAgent and AssistantAgent to do code generation?", - ], - n_results=4, - client=client, - collection_name="autogen-docs", - search_string="AutoGen", - ) - print(results["ids"][0]) - assert len(results["ids"][0]) == 4 - - if __name__ == "__main__": test_retrievechat() - test_retrieve_utils() diff --git a/test/oai/test_completion.py b/test/oai/test_completion.py index f33533b7bf88..b6cb5c31b1c2 100644 --- a/test/oai/test_completion.py +++ b/test/oai/test_completion.py @@ -231,7 +231,7 @@ def test_humaneval(num_samples=1): raise_on_ratelimit_or_timeout=False, ) # assert response == -1 - config_list = autogen.config_list_openai_aoai(KEY_LOC, exclude="aoai") + config_list = autogen.config_list_openai_aoai(KEY_LOC) # a minimal tuning example config, _ = autogen.Completion.tune( data=tune_data, @@ -376,11 +376,11 @@ def test_math(num_samples=-1): ] autogen.Completion.set_cache(seed) - config_list = autogen.config_list_openai_aoai(KEY_LOC)[:2] + config_list = autogen.config_list_openai_aoai(KEY_LOC) vanilla_config = { - "model": "text-davinci-003", + "model": "text-ada-001", "temperature": 1, - "max_tokens": 2048, + "max_tokens": 1024, "n": 1, "prompt": prompts[0], "stop": "###", @@ -451,5 +451,5 @@ def my_average(results): # test_chatcompletion() # test_multi_model() # test_nocontext() - test_humaneval(1) - # test_math(1) + # test_humaneval(1) + test_math(1) diff --git a/test/test_retrieve_utils.py b/test/test_retrieve_utils.py index db581b8798aa..0585b2a147d9 100644 --- a/test/test_retrieve_utils.py +++ b/test/test_retrieve_utils.py @@ -7,19 +7,15 @@ extract_text_from_pdf, split_files_to_chunks, get_files_from_dir, - get_file_from_url, is_url, create_vector_db_from_dir, query_vector_db, - TEXT_FORMATS, ) from autogen.token_count_utils import count_token import os -import sys import pytest import chromadb -import tiktoken test_dir = os.path.join(os.path.dirname(__file__), "test_files") @@ -157,6 +153,7 @@ def custom_text_split_function(text): client=client, collection_name="mytestcollection", custom_text_split_function=custom_text_split_function, + get_or_create=True, ) results = query_vector_db(["autogen"], client=client, collection_name="mytestcollection", n_results=1) assert ( @@ -164,6 +161,21 @@ def custom_text_split_function(text): == "AutoGen is an advanced tool designed to assist developers in harnessing the capabilities\nof Large Language Models (LLMs) for various applications. The primary purpose o" ) + def test_retrieve_utils(self): + client = chromadb.PersistentClient(path="/tmp/chromadb") + create_vector_db_from_dir(dir_path="./website/docs", client=client, collection_name="autogen-docs") + results = query_vector_db( + query_texts=[ + "How can I use AutoGen UserProxyAgent and AssistantAgent to do code generation?", + ], + n_results=4, + client=client, + collection_name="autogen-docs", + search_string="AutoGen", + ) + print(results["ids"][0]) + assert len(results["ids"][0]) == 4 + if __name__ == "__main__": pytest.main()