diff --git a/autogen/agentchat/contrib/capabilities/transform_messages.py b/autogen/agentchat/contrib/capabilities/transform_messages.py index e96dc39fa7bc..1ce219bdadfa 100644 --- a/autogen/agentchat/contrib/capabilities/transform_messages.py +++ b/autogen/agentchat/contrib/capabilities/transform_messages.py @@ -1,9 +1,8 @@ import copy from typing import Dict, List -from autogen import ConversableAgent - from ....formatting_utils import colored +from ...conversable_agent import ConversableAgent from .transforms import MessageTransform diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py index 53fc32e58d21..232503ad5684 100644 --- a/autogen/agentchat/groupchat.py +++ b/autogen/agentchat/groupchat.py @@ -5,7 +5,7 @@ import re import sys from dataclasses import dataclass, field -from typing import Callable, Dict, List, Literal, Optional, Tuple, Union +from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union from ..code_utils import content_str from ..exception_utils import AgentNameConflict, NoEligibleSpeaker, UndefinedNextAgent @@ -17,6 +17,12 @@ from .chat import ChatResult from .conversable_agent import ConversableAgent +try: + # Non-core module + from .contrib.capabilities import transform_messages +except ImportError: + transform_messages = None + logger = logging.getLogger(__name__) @@ -76,6 +82,8 @@ def custom_speaker_selection_func( of times until a single agent is returned or it exhausts the maximum attempts. Applies only to "auto" speaker selection method. Default is 2. + - select_speaker_transform_messages: (optional) the message transformations to apply to the nested select speaker agent-to-agent chat messages. + Takes a TransformMessages object, defaults to None and is only utilised when the speaker selection method is "auto". - select_speaker_auto_verbose: whether to output the select speaker responses and selections If set to True, the outputs from the two agents in the nested select speaker chat will be output, along with whether the responses were successful, or not, in selecting an agent @@ -132,6 +140,7 @@ def custom_speaker_selection_func( The names are case-sensitive and should not be abbreviated or changed. The only names that are accepted are {agentlist}. Respond with ONLY the name of the speaker and DO NOT provide a reason.""" + select_speaker_transform_messages: Optional[Any] = None select_speaker_auto_verbose: Optional[bool] = False role_for_select_speaker_messages: Optional[str] = "system" @@ -249,6 +258,20 @@ def __post_init__(self): elif self.max_retries_for_selecting_speaker < 0: raise ValueError("max_retries_for_selecting_speaker must be greater than or equal to zero") + # Load message transforms here (load once for the Group Chat so we don't have to re-initiate it and it maintains the cache across subsequent select speaker calls) + self._speaker_selection_transforms = None + if self.select_speaker_transform_messages is not None: + if transform_messages is not None: + if isinstance(self.select_speaker_transform_messages, transform_messages.TransformMessages): + self._speaker_selection_transforms = self.select_speaker_transform_messages + else: + raise ValueError("select_speaker_transform_messages must be None or MessageTransforms.") + else: + logger.warning( + "TransformMessages could not be loaded, the 'select_speaker_transform_messages' transform" + "will not apply." + ) + # Validate select_speaker_auto_verbose if self.select_speaker_auto_verbose is None or not isinstance(self.select_speaker_auto_verbose, bool): raise ValueError("select_speaker_auto_verbose cannot be None or non-bool") @@ -655,6 +678,10 @@ def validate_speaker_name(recipient, messages, sender, config) -> Tuple[bool, Un else: start_message = messages[-1] + # Add the message transforms, if any, to the speaker selection agent + if self._speaker_selection_transforms is not None: + self._speaker_selection_transforms.add_to_agent(speaker_selection_agent) + # Run the speaker selection chat result = checking_agent.initiate_chat( speaker_selection_agent, @@ -749,6 +776,10 @@ def validate_speaker_name(recipient, messages, sender, config) -> Tuple[bool, Un else: start_message = messages[-1] + # Add the message transforms, if any, to the speaker selection agent + if self._speaker_selection_transforms is not None: + self._speaker_selection_transforms.add_to_agent(speaker_selection_agent) + # Run the speaker selection chat result = await checking_agent.a_initiate_chat( speaker_selection_agent, diff --git a/test/agentchat/test_groupchat.py b/test/agentchat/test_groupchat.py index 291a94d450f1..39e8fb063026 100755 --- a/test/agentchat/test_groupchat.py +++ b/test/agentchat/test_groupchat.py @@ -12,6 +12,7 @@ import autogen from autogen import Agent, AssistantAgent, GroupChat, GroupChatManager +from autogen.agentchat.contrib.capabilities import transform_messages, transforms from autogen.exception_utils import AgentNameConflict, UndefinedNextAgent @@ -2061,6 +2062,46 @@ def test_manager_resume_messages(): return_agent, return_message = manager.resume(messages="Let's get this conversation started.") +def test_select_speaker_transform_messages(): + """Tests adding transform messages to a GroupChat for speaker selection when in 'auto' mode""" + + # Test adding a TransformMessages to a group chat + test_add_transforms = transform_messages.TransformMessages( + transforms=[ + transforms.MessageHistoryLimiter(max_messages=10), + transforms.MessageTokenLimiter(max_tokens=3000, max_tokens_per_message=500, min_tokens=300), + ] + ) + + coder = AssistantAgent(name="Coder", llm_config=None) + groupchat = GroupChat(messages=[], agents=[coder], select_speaker_transform_messages=test_add_transforms) + + # Ensure the transform have been added to the GroupChat + assert groupchat._speaker_selection_transforms == test_add_transforms + + # Attempt to add a non MessageTransforms object, such as a list of transforms + with pytest.raises(ValueError, match="select_speaker_transform_messages must be None or MessageTransforms."): + groupchat = GroupChat( + messages=[], + agents=[coder], + select_speaker_transform_messages=[transforms.MessageHistoryLimiter(max_messages=10)], + ) + + # Ensure if we don't pass any transforms in, none are on the GroupChat + groupchat_missing = GroupChat(messages=[], agents=[coder]) + + assert groupchat_missing._speaker_selection_transforms is None + + # Ensure we can pass in None + groupchat_none = GroupChat( + messages=[], + agents=[coder], + select_speaker_transform_messages=None, + ) + + assert groupchat_none._speaker_selection_transforms is None + + if __name__ == "__main__": # test_func_call_groupchat() # test_broadcast() @@ -2089,4 +2130,5 @@ def test_manager_resume_messages(): test_manager_resume_functions() # test_manager_resume_returns() # test_manager_resume_messages() + # test_select_speaker_transform_messages() pass diff --git a/website/docs/topics/groupchat/transform_messages_speaker_selection.ipynb b/website/docs/topics/groupchat/transform_messages_speaker_selection.ipynb new file mode 100644 index 000000000000..6e17d0cb94b3 --- /dev/null +++ b/website/docs/topics/groupchat/transform_messages_speaker_selection.ipynb @@ -0,0 +1,246 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using Transform Messages during Speaker Selection\n", + "\n", + "When using \"auto\" mode for speaker selection in group chats, a nested-chat is used to determine the next speaker. This nested-chat includes all of the group chat's messages and this can result in a lot of content which the LLM needs to process for determining the next speaker. As conversations progress, it can be challenging to keep the context length within the workable window for the LLM. Furthermore, reducing the number of overall tokens will improve inference time and reduce token costs.\n", + "\n", + "Using [Transform Messages](/docs/topics/handling_long_contexts/intro_to_transform_messages) you gain control over which messages are used for speaker selection and the context length within each message as well as overall.\n", + "\n", + "All the transforms available for Transform Messages can be applied to the speaker selection nested-chat, such as the `MessageHistoryLimiter`, `MessageTokenLimiter`, and `TextMessageCompressor`.\n", + "\n", + "## How do I apply them\n", + "\n", + "When instantiating your `GroupChat` object, all you need to do is assign a [TransformMessages](/docs/reference/agentchat/contrib/capabilities/transform_messages#transformmessages) object to the `select_speaker_transform_messages` parameter, and the transforms within it will be applied to the nested speaker selection chats.\n", + "\n", + "And, as you're passing in a `TransformMessages` object, multiple transforms can be applied to that nested chat.\n", + "\n", + "As part of the nested-chat, an agent called 'checking_agent' is used to direct the LLM on selecting the next speaker. It is preferable to avoid compressing or truncating the content from this agent. How this is done is shown in the second last example." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating transforms for speaker selection in a GroupChat\n", + "\n", + "We will progressively create a `TransformMessage` object to show how you can build up transforms for speaker selection.\n", + "\n", + "Each iteration will replace the previous one, enabling you to use the code in each cell as is.\n", + "\n", + "Importantly, transforms are applied in the order that they are in the transforms list." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# Start by importing the transform capabilities\n", + "\n", + "import autogen\n", + "from autogen.agentchat.contrib.capabilities import transform_messages, transforms" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# Limit the number of messages\n", + "\n", + "# Let's start by limiting the number of messages to consider for speaker selection using a\n", + "# MessageHistoryLimiter transform. This example will use the latest 10 messages.\n", + "\n", + "select_speaker_transforms = transform_messages.TransformMessages(\n", + " transforms=[\n", + " transforms.MessageHistoryLimiter(max_messages=10),\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# Compress messages through an LLM\n", + "\n", + "# An interesting and very powerful method of reducing tokens is by \"compressing\" the text of\n", + "# a message by using an LLM that's specifically designed to do that. The default LLM used for\n", + "# this purpose is LLMLingua (https://github.com/microsoft/LLMLingua) and it aims to reduce the\n", + "# number of tokens without reducing the message's meaning. We use the TextMessageCompressor\n", + "# transform to compress messages.\n", + "\n", + "# There are multiple LLMLingua models available and it defaults to the first version, LLMLingua.\n", + "# This example will show how to use LongLLMLingua which is targeted towards long-context\n", + "# information processing. LLMLingua-2 has been released and you could use that as well.\n", + "\n", + "# Create the compression arguments, which allow us to specify the model and other related\n", + "# parameters, such as whether to use the CPU or GPU.\n", + "select_speaker_compression_args = dict(\n", + " model_name=\"microsoft/llmlingua-2-xlm-roberta-large-meetingbank\", use_llmlingua2=True, device_map=\"cpu\"\n", + ")\n", + "\n", + "# Now we can add the TextMessageCompressor as the second step\n", + "\n", + "# Important notes on the parameters used:\n", + "# min_tokens - will only apply text compression if the message has at least 1,000 tokens\n", + "# cache - enables caching, if a message has previously been compressed it will use the\n", + "# cached version instead of recompressing it (making it much faster)\n", + "# filter_dict - to minimise the chance of compressing key information, we can include or\n", + "# exclude messages based on role and name.\n", + "# Here, we are excluding any 'system' messages as well as any messages from\n", + "# 'ceo' (just for example) and the 'checking_agent', which is an agent in the\n", + "# nested chat speaker selection chat. Change the 'ceo' name or add additional\n", + "# agent names for any agents that have critical content.\n", + "# exclude_filter - As we are setting this to True, the filter will be an exclusion filter.\n", + "\n", + "# Import the cache functionality\n", + "from autogen.cache.in_memory_cache import InMemoryCache\n", + "\n", + "select_speaker_transforms = transform_messages.TransformMessages(\n", + " transforms=[\n", + " transforms.MessageHistoryLimiter(max_messages=10),\n", + " transforms.TextMessageCompressor(\n", + " min_tokens=1000,\n", + " text_compressor=transforms.LLMLingua(select_speaker_compression_args, structured_compression=True),\n", + " cache=InMemoryCache(seed=43),\n", + " filter_dict={\"role\": [\"system\"], \"name\": [\"ceo\", \"checking_agent\"]},\n", + " exclude_filter=True,\n", + " ),\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# Limit the total number of tokens and tokens per message\n", + "\n", + "# As a final example, we can manage the total tokens and individual message tokens. We have added a\n", + "# MessageTokenLimiter transform that will limit the total number of tokens for the messages to\n", + "# 3,000 with a maximum of 500 per individual message. Additionally, if a message is less than 300\n", + "# tokens it will not be truncated.\n", + "\n", + "select_speaker_compression_args = dict(\n", + " model_name=\"microsoft/llmlingua-2-xlm-roberta-large-meetingbank\", use_llmlingua2=True, device_map=\"cpu\"\n", + ")\n", + "\n", + "select_speaker_transforms = transform_messages.TransformMessages(\n", + " transforms=[\n", + " transforms.MessageHistoryLimiter(max_messages=10),\n", + " transforms.TextMessageCompressor(\n", + " min_tokens=1000,\n", + " text_compressor=transforms.LLMLingua(select_speaker_compression_args, structured_compression=True),\n", + " cache=InMemoryCache(seed=43),\n", + " filter_dict={\"role\": [\"system\"], \"name\": [\"ceo\", \"checking_agent\"]},\n", + " exclude_filter=True,\n", + " ),\n", + " transforms.MessageTokenLimiter(max_tokens=3000, max_tokens_per_message=500, min_tokens=300),\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# Now, we apply the transforms to a group chat. We do this by assigning the message\n", + "# transforms from above to the `select_speaker_transform_messages` parameter on the GroupChat.\n", + "\n", + "import os\n", + "\n", + "llm_config = {\n", + " \"config_list\": [{\"model\": \"gpt-4\", \"api_key\": os.environ[\"OPENAI_API_KEY\"]}],\n", + "}\n", + "\n", + "# Define your agents\n", + "chief_executive_officer = autogen.ConversableAgent(\n", + " \"ceo\",\n", + " llm_config=llm_config,\n", + " max_consecutive_auto_reply=1,\n", + " system_message=\"You are leading this group chat, and the business, as the chief executive officer.\",\n", + ")\n", + "\n", + "general_manager = autogen.ConversableAgent(\n", + " \"gm\",\n", + " llm_config=llm_config,\n", + " max_consecutive_auto_reply=1,\n", + " system_message=\"You are the general manager of the business, running the day-to-day operations.\",\n", + ")\n", + "\n", + "financial_controller = autogen.ConversableAgent(\n", + " \"fin_controller\",\n", + " llm_config=llm_config,\n", + " max_consecutive_auto_reply=1,\n", + " system_message=\"You are the financial controller, ensuring all financial matters are managed accordingly.\",\n", + ")\n", + "\n", + "your_group_chat = autogen.GroupChat(\n", + " agents=[chief_executive_officer, general_manager, financial_controller],\n", + " select_speaker_transform_messages=select_speaker_transforms,\n", + ")" + ] + } + ], + "metadata": { + "front_matter": { + "description": "Custom Speaker Selection Function", + "tags": [ + "orchestration", + "group chat" + ] + }, + "kernelspec": { + "display_name": "autogen", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}