Merge branch 'main' into selectpromptoptional

microsoft · May 22, 2024 · b4f7f31 · b4f7f31
2 parents f9d81ec + fb74624
commit b4f7f31
Show file tree

Hide file tree

Showing 62 changed files with 741 additions and 601 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -88,7 +88,9 @@ jobs:
           fi
       - name: Test with pytest skipping openai tests
         if: matrix.python-version != '3.10' && matrix.os == 'ubuntu-latest'
+        # Remove the line below once https://github.com/docker/docker-py/issues/3256 is merged
         run: |
+          pip install "requests<2.32.0"
           pytest test --ignore=test/agentchat/contrib --skip-openai --durations=10 --durations-min=1.0
       - name: Test with pytest skipping openai and docker tests
         if: matrix.python-version != '3.10' && matrix.os != 'ubuntu-latest'

diff --git a/README.md b/README.md
@@ -7,6 +7,7 @@
 [![Discord](https://img.shields.io/discord/1153072414184452236?logo=discord&style=flat)](https://aka.ms/autogen-dc)
 [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Follow%20%40pyautogen)](https://twitter.com/pyautogen)
 
+[![NuGet version](https://badge.fury.io/nu/AutoGen.Core.svg)](https://badge.fury.io/nu/AutoGen.Core)
 
 # AutoGen
 [📚 Cite paper](#related-papers).

diff --git a/autogen/agentchat/chat.py b/autogen/agentchat/chat.py
@@ -195,7 +195,9 @@ def initiate_chats(chat_queue: List[Dict[str, Any]]) -> List[ChatResult]:
             r.summary for i, r in enumerate(finished_chats) if i not in finished_chat_indexes_to_exclude_from_carryover
         ]
 
-        __post_carryover_processing(chat_info)
+        if not chat_info.get("silent", False):
+            __post_carryover_processing(chat_info)
+
         sender = chat_info["sender"]
         chat_res = sender.initiate_chat(**chat_info)
         finished_chats.append(chat_res)
@@ -236,7 +238,10 @@ async def _dependent_chat_future(
     if isinstance(_chat_carryover, str):
         _chat_carryover = [_chat_carryover]
     chat_info["carryover"] = _chat_carryover + [finished_chats[pre_id].summary for pre_id in finished_chats]
-    __post_carryover_processing(chat_info)
+
+    if not chat_info.get("silent", False):
+        __post_carryover_processing(chat_info)
+
     sender = chat_info["sender"]
     chat_res_future = asyncio.create_task(sender.a_initiate_chat(**chat_info))
     call_back_with_args = partial(_on_chat_future_done, chat_id=chat_id)

diff --git a/autogen/agentchat/contrib/gpt_assistant_agent.py b/autogen/agentchat/contrib/gpt_assistant_agent.py
@@ -172,7 +172,7 @@ def __init__(
         # lazily create threads
         self._openai_threads = {}
         self._unread_index = defaultdict(int)
-        self.register_reply(Agent, GPTAssistantAgent._invoke_assistant, position=2)
+        self.register_reply([Agent, None], GPTAssistantAgent._invoke_assistant, position=2)
 
     def _invoke_assistant(
         self,

diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py
@@ -947,6 +947,7 @@ def __init__(
         max_consecutive_auto_reply: Optional[int] = sys.maxsize,
         human_input_mode: Optional[str] = "NEVER",
         system_message: Optional[Union[str, List]] = "Group chat manager.",
+        silent: bool = False,
         **kwargs,
     ):
         if (
@@ -970,6 +971,8 @@ def __init__(
         # Store groupchat
         self._groupchat = groupchat
 
+        self._silent = silent
+
         # Order of register_reply is important.
         # Allow sync chat if initiated using initiate_chat
         self.register_reply(Agent, GroupChatManager.run_chat, config=groupchat, reset_config=GroupChat.reset)
@@ -1022,6 +1025,7 @@ def run_chat(
         speaker = sender
         groupchat = config
         send_introductions = getattr(groupchat, "send_introductions", False)
+        silent = getattr(self, "_silent", False)
 
         if send_introductions:
             # Broadcast the intro
@@ -1076,7 +1080,7 @@ def run_chat(
                 reply["content"] = self.clear_agents_history(reply, groupchat)
 
             # The speaker sends the message without requesting a reply
-            speaker.send(reply, self, request_reply=False)
+            speaker.send(reply, self, request_reply=False, silent=silent)
             message = self.last_message(speaker)
         if self.client_cache is not None:
             for a in groupchat.agents:
@@ -1097,6 +1101,7 @@ async def a_run_chat(
         speaker = sender
         groupchat = config
         send_introductions = getattr(groupchat, "send_introductions", False)
+        silent = getattr(self, "_silent", False)
 
         if send_introductions:
             # Broadcast the intro
@@ -1141,7 +1146,7 @@ async def a_run_chat(
             if reply is None:
                 break
             # The speaker sends the message without requesting a reply
-            await speaker.a_send(reply, self, request_reply=False)
+            await speaker.a_send(reply, self, request_reply=False, silent=silent)
             message = self.last_message(speaker)
         if self.client_cache is not None:
             for a in groupchat.agents:

diff --git a/autogen/logger/file_logger.py b/autogen/logger/file_logger.py
@@ -17,6 +17,7 @@
 
 if TYPE_CHECKING:
     from autogen import Agent, ConversableAgent, OpenAIWrapper
+    from autogen.oai.gemini import GeminiClient
 
 logger = logging.getLogger(__name__)
 
@@ -177,7 +178,9 @@ def log_new_wrapper(
         except Exception as e:
             self.logger.error(f"[file_logger] Failed to log event {e}")
 
-    def log_new_client(self, client: AzureOpenAI | OpenAI, wrapper: OpenAIWrapper, init_args: Dict[str, Any]) -> None:
+    def log_new_client(
+        self, client: AzureOpenAI | OpenAI | GeminiClient, wrapper: OpenAIWrapper, init_args: Dict[str, Any]
+    ) -> None:
         """
         Log a new client instance.
         """

diff --git a/autogen/logger/sqlite_logger.py b/autogen/logger/sqlite_logger.py
@@ -18,6 +18,7 @@
 
 if TYPE_CHECKING:
     from autogen import Agent, ConversableAgent, OpenAIWrapper
+    from autogen.oai.gemini import GeminiClient
 
 logger = logging.getLogger(__name__)
 lock = threading.Lock()
@@ -316,7 +317,7 @@ def log_new_wrapper(self, wrapper: OpenAIWrapper, init_args: Dict[str, Union[LLM
         self._run_query(query=query, args=args)
 
     def log_new_client(
-        self, client: Union[AzureOpenAI, OpenAI], wrapper: OpenAIWrapper, init_args: Dict[str, Any]
+        self, client: Union[AzureOpenAI, OpenAI, GeminiClient], wrapper: OpenAIWrapper, init_args: Dict[str, Any]
     ) -> None:
         if self.con is None:
             return

diff --git a/autogen/oai/client.py b/autogen/oai/client.py
@@ -435,7 +435,8 @@ def _register_default_client(self, config: Dict[str, Any], openai_config: Dict[s
             elif api_type is not None and api_type.startswith("google"):
                 if gemini_import_exception:
                     raise ImportError("Please install `google-generativeai` to use Google OpenAI API.")
-                self._clients.append(GeminiClient(**openai_config))
+                client = GeminiClient(**openai_config)
+                self._clients.append(client)
             else:
                 client = OpenAI(**openai_config)
                 self._clients.append(OpenAIClient(client))

diff --git a/autogen/runtime_logging.py b/autogen/runtime_logging.py
@@ -13,6 +13,7 @@
 
 if TYPE_CHECKING:
     from autogen import Agent, ConversableAgent, OpenAIWrapper
+    from autogen.oai.gemini import GeminiClient
 
 logger = logging.getLogger(__name__)
 
@@ -94,7 +95,9 @@ def log_new_wrapper(wrapper: OpenAIWrapper, init_args: Dict[str, Union[LLMConfig
     autogen_logger.log_new_wrapper(wrapper, init_args)
 
 
-def log_new_client(client: Union[AzureOpenAI, OpenAI], wrapper: OpenAIWrapper, init_args: Dict[str, Any]) -> None:
+def log_new_client(
+    client: Union[AzureOpenAI, OpenAI, GeminiClient], wrapper: OpenAIWrapper, init_args: Dict[str, Any]
+) -> None:
     if autogen_logger is None:
         logger.error("[runtime logging] log_new_client: autogen logger is None")
         return

diff --git a/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/OpenAICodeSnippet.cs b/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/OpenAICodeSnippet.cs
@@ -84,7 +84,7 @@ public async Task CreateOpenAIChatAgentAsync()
                     new TextMessage(Role.Assistant, "Hello", from: "user"),
                 ],
                 from: "user"),
-            new Message(Role.Assistant, "Hello", from: "user"), // Message type is going to be deprecated, please use TextMessage instead
+            new TextMessage(Role.Assistant, "Hello", from: "user"), // Message type is going to be deprecated, please use TextMessage instead
         };
 
         foreach (var message in messages)

diff --git a/dotnet/sample/AutoGen.BasicSamples/Example03_Agent_FunctionCall.cs b/dotnet/sample/AutoGen.BasicSamples/Example03_Agent_FunctionCall.cs
@@ -77,20 +77,30 @@ public static async Task RunAsync()
         // talk to the assistant agent
         var upperCase = await agent.SendAsync("convert to upper case: hello world");
         upperCase.GetContent()?.Should().Be("HELLO WORLD");
-        upperCase.Should().BeOfType<AggregateMessage<ToolCallMessage, ToolCallResultMessage>>();
+        upperCase.Should().BeOfType<ToolCallAggregateMessage>();
         upperCase.GetToolCalls().Should().HaveCount(1);
         upperCase.GetToolCalls().First().FunctionName.Should().Be(nameof(UpperCase));
 
         var concatString = await agent.SendAsync("concatenate strings: a, b, c, d, e");
         concatString.GetContent()?.Should().Be("a b c d e");
-        concatString.Should().BeOfType<AggregateMessage<ToolCallMessage, ToolCallResultMessage>>();
+        concatString.Should().BeOfType<ToolCallAggregateMessage>();
         concatString.GetToolCalls().Should().HaveCount(1);
         concatString.GetToolCalls().First().FunctionName.Should().Be(nameof(ConcatString));
 
         var calculateTax = await agent.SendAsync("calculate tax: 100, 0.1");
         calculateTax.GetContent().Should().Be("tax is 10");
-        calculateTax.Should().BeOfType<AggregateMessage<ToolCallMessage, ToolCallResultMessage>>();
+        calculateTax.Should().BeOfType<ToolCallAggregateMessage>();
         calculateTax.GetToolCalls().Should().HaveCount(1);
         calculateTax.GetToolCalls().First().FunctionName.Should().Be(nameof(CalculateTax));
+
+        // parallel function calls
+        var calculateTaxes = await agent.SendAsync("calculate tax: 100, 0.1; calculate tax: 200, 0.2");
+        calculateTaxes.GetContent().Should().Be("tax is 10\ntax is 40"); // "tax is 10\n tax is 40
+        calculateTaxes.Should().BeOfType<ToolCallAggregateMessage>();
+        calculateTaxes.GetToolCalls().Should().HaveCount(2);
+        calculateTaxes.GetToolCalls().First().FunctionName.Should().Be(nameof(CalculateTax));
+
+        // send aggregate message back to llm to get the final result
+        var finalResult = await agent.SendAsync(calculateTaxes);
     }
 }
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example04_Dynamic_GroupChat_Coding_Task.cs b/dotnet/sample/AutoGen.BasicSamples/Example04_Dynamic_GroupChat_Coding_Task.cs
@@ -40,7 +40,8 @@ public static async Task RunAsync()
             name: "groupAdmin",
             systemMessage: "You are the admin of the group chat",
             temperature: 0f,
-            config: gptConfig);
+            config: gptConfig)
+            .RegisterPrintMessage();
 
         var userProxy = new UserProxyAgent(name: "user", defaultReply: GroupChatExtension.TERMINATE, humanInputMode: HumanInputMode.NEVER)
             .RegisterPrintMessage();

diff --git a/dotnet/sample/AutoGen.BasicSamples/Example05_Dalle_And_GPT4V.cs b/dotnet/sample/AutoGen.BasicSamples/Example05_Dalle_And_GPT4V.cs
@@ -1,8 +1,9 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Example05_Dalle_And_GPT4V.cs
 
-using AutoGen;
 using AutoGen.Core;
+using AutoGen.OpenAI;
+using AutoGen.OpenAI.Extension;
 using Azure.AI.OpenAI;
 using FluentAssertions;
 using autogen = AutoGen.LLMConfigAPI;
@@ -66,50 +67,39 @@ public static async Task RunAsync()
             File.Delete(imagePath);
         }
 
-        var dalleAgent = new AssistantAgent(
-            name: "dalle",
-            systemMessage: "You are a DALL-E agent that generate image from prompt, when conversation is terminated, return the most recent image url",
-            llmConfig: new ConversableAgentConfig
-            {
-                Temperature = 0,
-                ConfigList = gpt35Config,
-                FunctionContracts = new[]
-                {
-                    instance.GenerateImageFunctionContract,
-                },
-            },
+        var generateImageFunctionMiddleware = new FunctionCallMiddleware(
+            functions: [instance.GenerateImageFunctionContract],
             functionMap: new Dictionary<string, Func<string, Task<string>>>
             {
                 { nameof(GenerateImage), instance.GenerateImageWrapper },
-            })
+            });
+        var dalleAgent = new OpenAIChatAgent(
+            openAIClient: openAIClient,
+            modelName: "gpt-3.5-turbo",
+            name: "dalle",
+            systemMessage: "You are a DALL-E agent that generate image from prompt, when conversation is terminated, return the most recent image url")
+            .RegisterMessageConnector()
+            .RegisterStreamingMiddleware(generateImageFunctionMiddleware)
             .RegisterMiddleware(async (msgs, option, agent, ct) =>
             {
-                // if last message contains [TERMINATE], then find the last image url and terminate the conversation
-                if (msgs.Last().GetContent()?.Contains("TERMINATE") is true)
+                if (msgs.Any(msg => msg.GetContent()?.ToLower().Contains("approve") is true))
                 {
-                    var lastMessageWithImage = msgs.Last(msg => msg is ImageMessage) as ImageMessage;
-                    var lastImageUrl = lastMessageWithImage.Url;
-                    Console.WriteLine($"download image from {lastImageUrl} to {imagePath}");
-                    var httpClient = new HttpClient();
-                    var imageBytes = await httpClient.GetByteArrayAsync(lastImageUrl);
-                    File.WriteAllBytes(imagePath, imageBytes);
-
-                    var messageContent = $@"{GroupChatExtension.TERMINATE}
-
-{lastImageUrl}";
-                    return new TextMessage(Role.Assistant, messageContent)
-                    {
-                        From = "dalle",
-                    };
+                    return new TextMessage(Role.Assistant, $"The image satisfies the condition, conversation is terminated. {GroupChatExtension.TERMINATE}");
                 }
 
-                var reply = await agent.GenerateReplyAsync(msgs, option, ct);
+                var msgsWithoutImage = msgs.Where(msg => msg is not ImageMessage).ToList();
+                var reply = await agent.GenerateReplyAsync(msgsWithoutImage, option, ct);
 
                 if (reply.GetContent() is string content && content.Contains("IMAGE_GENERATION"))
                 {
                     var imageUrl = content.Split("\n").Last();
                     var imageMessage = new ImageMessage(Role.Assistant, imageUrl, from: reply.From);
 
+                    Console.WriteLine($"download image from {imageUrl} to {imagePath}");
+                    var httpClient = new HttpClient();
+                    var imageBytes = await httpClient.GetByteArrayAsync(imageUrl, ct);
+                    File.WriteAllBytes(imagePath, imageBytes);
+
                     return imageMessage;
                 }
                 else
@@ -119,33 +109,25 @@ public static async Task RunAsync()
             })
             .RegisterPrintMessage();
 
-        var gpt4VAgent = new AssistantAgent(
+        var gpt4VAgent = new OpenAIChatAgent(
+            openAIClient: openAIClient,
             name: "gpt4v",
+            modelName: "gpt-4-vision-preview",
             systemMessage: @"You are a critism that provide feedback to DALL-E agent.
 Carefully check the image generated by DALL-E agent and provide feedback.
-If the image satisfies the condition, then terminate the conversation by saying [TERMINATE].
+If the image satisfies the condition, then say [APPROVE].
 Otherwise, provide detailed feedback to DALL-E agent so it can generate better image.
 
 The image should satisfy the following conditions:
 - There should be a cat and a mouse in the image
-- The cat should be chasing after the mouse
-",
-            llmConfig: new ConversableAgentConfig
-            {
-                Temperature = 0,
-                ConfigList = gpt4vConfig,
-            })
+- The cat should be chasing after the mouse")
+            .RegisterMessageConnector()
             .RegisterPrintMessage();
 
-        IEnumerable<IMessage> conversation = new List<IMessage>()
-        {
-            new TextMessage(Role.User, "Hey dalle, please generate image from prompt: English short hair blue cat chase after a mouse")
-        };
-        var maxRound = 20;
         await gpt4VAgent.InitiateChatAsync(
             receiver: dalleAgent,
             message: "Hey dalle, please generate image from prompt: English short hair blue cat chase after a mouse",
-            maxRound: maxRound);
+            maxRound: 10);
 
         File.Exists(imagePath).Should().BeTrue();
     }