Merge branch 'main' into main

microsoft · Apr 23, 2024 · 672c545 · 672c545
2 parents edc5dc5 + a41182a
commit 672c545
Show file tree

Hide file tree

Showing 35 changed files with 5,867 additions and 1,769 deletions.
diff --git a/.github/workflows/contrib-openai.yml b/.github/workflows/contrib-openai.yml
@@ -41,19 +41,23 @@ jobs:
           pip install -e .
           python -c "import autogen"
           pip install coverage pytest-asyncio
+      - name: Install PostgreSQL
+        run: |
+          sudo apt install postgresql -y
+      - name: Start PostgreSQL service
+        run: sudo service postgresql start
       - name: Install packages for test when needed
         run: |
           pip install docker
-          pip install qdrant_client[fastembed]
-          pip install -e .[retrievechat]
+          pip install -e .[retrievechat-qdrant,retrievechat-pgvector]
       - name: Coverage
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
           AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
           OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
         run: |
-          coverage run -a -m pytest test/agentchat/contrib/test_retrievechat.py::test_retrievechat test/agentchat/contrib/test_qdrant_retrievechat.py::test_retrievechat
+          coverage run -a -m pytest test/agentchat/contrib/test_retrievechat.py::test_retrievechat test/agentchat/contrib/test_qdrant_retrievechat.py::test_retrievechat test/agentchat/contrib/test_pgvector_retrievechat.py::test_retrievechat
           coverage xml
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3

diff --git a/.github/workflows/contrib-tests.yml b/.github/workflows/contrib-tests.yml
@@ -42,16 +42,20 @@ jobs:
       - name: Install qdrant_client when python-version is 3.10
         if: matrix.python-version == '3.10'
         run: |
-          pip install qdrant_client[fastembed]
+          pip install .[retrievechat-qdrant]
       - name: Install unstructured when python-version is 3.9 and on linux
-        if: matrix.python-version == '3.9' && matrix.os == 'ubuntu-latest'
         run: |
           sudo apt-get update
           sudo apt-get install -y tesseract-ocr poppler-utils
           pip install unstructured[all-docs]==0.13.0
-      - name: Install packages and dependencies for RetrieveChat
+      - name: Install and Start PostgreSQL
+        runs-on: ubuntu-latest
         run: |
-          pip install -e .[retrievechat]
+          sudo apt install postgresql -y
+          sudo service postgresql start
+      - name: Install packages and dependencies for PGVector
+        run: |
+          pip install -e .[retrievechat-pgvector]
       - name: Set AUTOGEN_USE_DOCKER based on OS
         shell: bash
         run: |

diff --git a/.github/workflows/dotnet-build.yml b/.github/workflows/dotnet-build.yml
@@ -22,13 +22,15 @@ permissions:
 
 jobs:
   build:
-    name: Build
+    name: Dotnet Build
     runs-on: ubuntu-latest
     defaults:
       run:
         working-directory: dotnet
     steps:
     - uses: actions/checkout@v4
+      with:
+        lfs: true
     - name: Setup .NET
       uses: actions/setup-dotnet@v4
       with:
@@ -54,6 +56,8 @@ jobs:
     needs: build
     steps:
     - uses: actions/checkout@v4
+      with:
+        lfs: true
     - name: Setup .NET
       uses: actions/setup-dotnet@v4
       with:

diff --git a/.github/workflows/dotnet-release.yml b/.github/workflows/dotnet-release.yml
@@ -27,6 +27,8 @@ jobs:
         working-directory: dotnet
     steps:
     - uses: actions/checkout@v4
+      with:
+        lfs: true
     - name: Setup .NET
       uses: actions/setup-dotnet@v4
       with:
@@ -66,4 +68,4 @@ jobs:
         $version = $metaInfoContent | Select-String -Pattern "<VersionPrefix>(.*)</VersionPrefix>" | ForEach-Object { $_.Matches.Groups[1].Value }
         git tag -a "$version" -m "AutoGen.Net release $version"
         git push origin --tags
-      shell: pwsh
+      shell: pwsh
diff --git a/.github/workflows/lfs-check.yml b/.github/workflows/lfs-check.yml
@@ -0,0 +1,15 @@
+name: "Git LFS Check"
+
+on: pull_request
+permissions: {}
+jobs:
+  lfs-check:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          lfs: true
+      - name: Check Git LFS files for consistency
+        run: |
+          git lfs fsck
diff --git a/OAI_CONFIG_LIST_sample b/OAI_CONFIG_LIST_sample
@@ -5,7 +5,8 @@
 [
     {
         "model": "gpt-4",
-        "api_key": "<your OpenAI API key here>"
+        "api_key": "<your OpenAI API key here>",
+        "tags": ["gpt-4", "tool"]
     },
     {
         "model": "<your Azure OpenAI deployment name>",

diff --git a/README.md b/README.md
@@ -14,7 +14,7 @@
     <img src="https://github.com/microsoft/autogen/blob/main/website/static/img/flaml.svg"  width=200>
     <br>
 </p> -->
-:fire: Mar 26, 2024: Andrew Ng gave a shoutout to AutoGen in [What's next for AI agentic workflows](https://youtu.be/sal78ACtGTc?si=JduUzN_1kDnMq0vF) at Sequoia Capital's AI Ascent.
+:fire: Apr 17, 2024: Andrew Ng cited AutoGen in [The Batch newsletter](https://www.deeplearning.ai/the-batch/issue-245/) and [What's next for AI agentic workflows](https://youtu.be/sal78ACtGTc?si=JduUzN_1kDnMq0vF) at Sequoia Capital's AI Ascent (Mar 26).
 
 :fire: Mar 3, 2024: What's new in AutoGen? 📰[Blog](https://microsoft.github.io/autogen/blog/2024/03/03/AutoGen-Update); 📺[Youtube](https://www.youtube.com/watch?v=j_mtwQiaLGU).
 

diff --git a/autogen/agentchat/contrib/gpt_assistant_agent.py b/autogen/agentchat/contrib/gpt_assistant_agent.py
@@ -10,7 +10,7 @@
 from autogen import OpenAIWrapper
 from autogen.agentchat.agent import Agent
 from autogen.agentchat.assistant_agent import AssistantAgent, ConversableAgent
-from autogen.oai.openai_utils import retrieve_assistants_by_name
+from autogen.oai.openai_utils import create_gpt_assistant, retrieve_assistants_by_name, update_gpt_assistant
 
 logger = logging.getLogger(__name__)
 
@@ -50,7 +50,8 @@ def __init__(
                 - check_every_ms: check thread run status interval
                 - tools: Give Assistants access to OpenAI-hosted tools like Code Interpreter and Knowledge Retrieval,
                         or build your own tools using Function calling. ref https://platform.openai.com/docs/assistants/tools
-                - file_ids: files used by retrieval in run
+                - file_ids: (Deprecated) files used by retrieval in run. It is Deprecated, use tool_resources instead. https://platform.openai.com/docs/assistants/migration/what-has-changed.
+                - tool_resources: A set of resources that are used by the assistant's tools. The resources are specific to the type of tool.
             overwrite_instructions (bool): whether to overwrite the instructions of an existing assistant. This parameter is in effect only when assistant_id is specified in llm_config.
             overwrite_tools (bool): whether to overwrite the tools of an existing assistant. This parameter is in effect only when assistant_id is specified in llm_config.
             kwargs (dict): Additional configuration options for the agent.
@@ -90,7 +91,6 @@ def __init__(
                     candidate_assistants,
                     instructions,
                     openai_assistant_cfg.get("tools", []),
-                    openai_assistant_cfg.get("file_ids", []),
                 )
 
             if len(candidate_assistants) == 0:
@@ -101,12 +101,12 @@ def __init__(
                         "No instructions were provided for new assistant. Using default instructions from AssistantAgent.DEFAULT_SYSTEM_MESSAGE."
                     )
                     instructions = AssistantAgent.DEFAULT_SYSTEM_MESSAGE
-                self._openai_assistant = self._openai_client.beta.assistants.create(
+                self._openai_assistant = create_gpt_assistant(
+                    self._openai_client,
                     name=name,
                     instructions=instructions,
-                    tools=openai_assistant_cfg.get("tools", []),
                     model=model_name,
-                    file_ids=openai_assistant_cfg.get("file_ids", []),
+                    assistant_config=openai_assistant_cfg,
                 )
             else:
                 logger.warning(
@@ -127,9 +127,12 @@ def __init__(
                 logger.warning(
                     "overwrite_instructions is True. Provided instructions will be used and will modify the assistant in the API"
                 )
-                self._openai_assistant = self._openai_client.beta.assistants.update(
+                self._openai_assistant = update_gpt_assistant(
+                    self._openai_client,
                     assistant_id=openai_assistant_id,
-                    instructions=instructions,
+                    assistant_config={
+                        "instructions": instructions,
+                    },
                 )
             else:
                 logger.warning(
@@ -154,9 +157,13 @@ def __init__(
                 logger.warning(
                     "overwrite_tools is True. Provided tools will be used and will modify the assistant in the API"
                 )
-                self._openai_assistant = self._openai_client.beta.assistants.update(
+                self._openai_assistant = update_gpt_assistant(
+                    self._openai_client,
                     assistant_id=openai_assistant_id,
-                    tools=openai_assistant_cfg.get("tools", []),
+                    assistant_config={
+                        "tools": specified_tools,
+                        "tool_resources": openai_assistant_cfg.get("tool_resources", None),
+                    },
                 )
             else:
                 # Tools are specified but overwrite_tools is False; do not update the assistant's tools
@@ -198,6 +205,8 @@ def _invoke_assistant(
         assistant_thread = self._openai_threads[sender]
         # Process each unread message
         for message in pending_messages:
+            if message["content"].strip() == "":
+                continue
             self._openai_client.beta.threads.messages.create(
                 thread_id=assistant_thread.id,
                 content=message["content"],
@@ -426,22 +435,23 @@ def delete_assistant(self):
         logger.warning("Permanently deleting assistant...")
         self._openai_client.beta.assistants.delete(self.assistant_id)
 
-    def find_matching_assistant(self, candidate_assistants, instructions, tools, file_ids):
+    def find_matching_assistant(self, candidate_assistants, instructions, tools):
         """
         Find the matching assistant from a list of candidate assistants.
-        Filter out candidates with the same name but different instructions, file IDs, and function names.
-        TODO: implement accurate match based on assistant metadata fields.
+        Filter out candidates with the same name but different instructions, and function names.
         """
         matching_assistants = []
 
         # Preprocess the required tools for faster comparison
-        required_tool_types = set(tool.get("type") for tool in tools)
+        required_tool_types = set(
+            "file_search" if tool.get("type") in ["retrieval", "file_search"] else tool.get("type") for tool in tools
+        )
+
         required_function_names = set(
             tool.get("function", {}).get("name")
             for tool in tools
-            if tool.get("type") not in ["code_interpreter", "retrieval"]
+            if tool.get("type") not in ["code_interpreter", "retrieval", "file_search"]
         )
-        required_file_ids = set(file_ids)  # Convert file_ids to a set for unordered comparison
 
         for assistant in candidate_assistants:
             # Check if instructions are similar
@@ -454,11 +464,12 @@ def find_matching_assistant(self, candidate_assistants, instructions, tools, fil
                 continue
 
             # Preprocess the assistant's tools
-            assistant_tool_types = set(tool.type for tool in assistant.tools)
+            assistant_tool_types = set(
+                "file_search" if tool.type in ["retrieval", "file_search"] else tool.type for tool in assistant.tools
+            )
             assistant_function_names = set(tool.function.name for tool in assistant.tools if hasattr(tool, "function"))
-            assistant_file_ids = set(getattr(assistant, "file_ids", []))  # Convert to set for comparison
 
-            # Check if the tool types, function names, and file IDs match
+            # Check if the tool types, function names match
             if required_tool_types != assistant_tool_types or required_function_names != assistant_function_names:
                 logger.warning(
                     "tools not match, skip assistant(%s): tools %s, functions %s",
@@ -467,9 +478,6 @@ def find_matching_assistant(self, candidate_assistants, instructions, tools, fil
                     assistant_function_names,
                 )
                 continue
-            if required_file_ids != assistant_file_ids:
-                logger.warning("file_ids not match, skip assistant(%s): %s", assistant.id, assistant_file_ids)
-                continue
 
             # Append assistant to matching list if all conditions are met
             matching_assistants.append(assistant)
@@ -496,7 +504,7 @@ def _process_assistant_config(self, llm_config, assistant_config):
 
         # Move the assistant related configurations to assistant_config
         # It's important to keep forward compatibility
-        assistant_config_items = ["assistant_id", "tools", "file_ids", "check_every_ms"]
+        assistant_config_items = ["assistant_id", "tools", "file_ids", "tool_resources", "check_every_ms"]
         for item in assistant_config_items:
             if openai_client_cfg.get(item) is not None and openai_assistant_cfg.get(item) is None:
                 openai_assistant_cfg[item] = openai_client_cfg[item]

diff --git a/autogen/agentchat/contrib/vectordb/base.py b/autogen/agentchat/contrib/vectordb/base.py
@@ -185,7 +185,7 @@ class VectorDBFactory:
     Factory class for creating vector databases.
     """
 
-    PREDEFINED_VECTOR_DB = ["chroma"]
+    PREDEFINED_VECTOR_DB = ["chroma", "pgvector"]
 
     @staticmethod
     def create_vector_db(db_type: str, **kwargs) -> VectorDB:
@@ -203,6 +203,10 @@ def create_vector_db(db_type: str, **kwargs) -> VectorDB:
             from .chromadb import ChromaVectorDB
 
             return ChromaVectorDB(**kwargs)
+        if db_type.lower() in ["pgvector", "pgvectordb"]:
+            from .pgvectordb import PGVectorDB
+
+            return PGVectorDB(**kwargs)
         else:
             raise ValueError(
                 f"Unsupported vector database type: {db_type}. Valid types are {VectorDBFactory.PREDEFINED_VECTOR_DB}."