diff --git a/camel/functions/__init__.py b/camel/functions/__init__.py index 246fa7420c..0b9cf7cd71 100644 --- a/camel/functions/__init__.py +++ b/camel/functions/__init__.py @@ -20,7 +20,7 @@ from .math_functions import MATH_FUNCS from .search_functions import SEARCH_FUNCS from .weather_functions import WEATHER_FUNCS -from .unstructured_io_fuctions import UnstructuredModules +from ..loaders.unstructured_io import UnstructuredIO __all__ = [ 'OpenAIFunction', @@ -29,5 +29,5 @@ 'MATH_FUNCS', 'SEARCH_FUNCS', 'WEATHER_FUNCS', - 'UnstructuredModules', + 'UnstructuredIO', ] diff --git a/camel/loaders/__init__.py b/camel/loaders/__init__.py new file mode 100644 index 0000000000..ee476ee288 --- /dev/null +++ b/camel/loaders/__init__.py @@ -0,0 +1,22 @@ +# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. =========== +# Licensed under the Apache License, Version 2.0 (the “License”); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an “AS IS” BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. =========== + +from .base_io import File, read_file +from .unstructured_io import UnstructuredIO + +__all__ = [ + 'File', + 'read_file', + 'UnstructuredIO', +] diff --git a/camel/functions/base_io_functions.py b/camel/loaders/base_io.py similarity index 89% rename from camel/functions/base_io_functions.py rename to camel/loaders/base_io.py index dbe5994890..1d6ca3bc78 100644 --- a/camel/functions/base_io_functions.py +++ b/camel/loaders/base_io.py @@ -35,10 +35,10 @@ def __init__( Args: name (str): The name of the file. id (str): The unique identifier of the file. - metadata (Dict[str, Any], optional): - Additional metadata associated with the file. Defaults to None. - docs (List[Dict[str, Any]], optional): - A list of documents contained within the file. Defaults to None. + metadata (Dict[str, Any], optional): Additional metadata + associated with the file. Defaults to None. + docs (List[Dict[str, Any]], optional): A list of documents + contained within the file. Defaults to None. """ self.name = name self.id = id @@ -51,8 +51,8 @@ def from_bytes(cls, file: BytesIO) -> "File": r"""Creates a File object from a BytesIO object. Args: - file (BytesIO): - A BytesIO object representing the contents of the file. + file (BytesIO): A BytesIO object representing the contents of the + file. Returns: File: A File object. @@ -96,8 +96,8 @@ def from_bytes(cls, file: BytesIO) -> "DocxFile": r"""Creates a DocxFile object from a BytesIO object. Args: - file (BytesIO): - A BytesIO object representing the contents of the docx file. + file (BytesIO): A BytesIO object representing the contents of the + docx file. Returns: DocxFile: A DocxFile object. @@ -127,8 +127,8 @@ def from_bytes(cls, file: BytesIO) -> "PdfFile": r"""Creates a PdfFile object from a BytesIO object. Args: - file (BytesIO): - A BytesIO object representing the contents of the pdf file. + file (BytesIO): A BytesIO object representing the contents of the + pdf file. Returns: PdfFile: A PdfFile object. @@ -162,8 +162,8 @@ def from_bytes(cls, file: BytesIO) -> "TxtFile": r"""Creates a TxtFile object from a BytesIO object. Args: - file (BytesIO): - A BytesIO object representing the contents of the txt file. + file (BytesIO): A BytesIO object representing the contents of the + txt file. Returns: TxtFile: A TxtFile object. @@ -187,8 +187,8 @@ def from_bytes(cls, file: BytesIO) -> "JsonFile": r"""Creates a JsonFile object from a BytesIO object. Args: - file (BytesIO): - A BytesIO object representing the contents of the json file. + file (BytesIO): A BytesIO object representing the contents of the + json file. Returns: JsonFile: A JsonFile object. @@ -211,8 +211,8 @@ def from_bytes(cls, file: BytesIO) -> "HtmlFile": r"""Creates a HtmlFile object from a BytesIO object. Args: - file (BytesIO): - A BytesIO object representing the contents of the html file. + file (BytesIO): A BytesIO object representing the contents of the + html file. Returns: HtmlFile: A HtmlFile object. diff --git a/camel/functions/unstructured_io_fuctions.py b/camel/loaders/unstructured_io.py similarity index 88% rename from camel/functions/unstructured_io_fuctions.py rename to camel/loaders/unstructured_io.py index 9eb72d68a1..2be9c25c7c 100644 --- a/camel/functions/unstructured_io_fuctions.py +++ b/camel/loaders/unstructured_io.py @@ -15,7 +15,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple, Union -class UnstructuredModules: +class UnstructuredIO: r"""A class to handle various functionalities provided by the Unstructured library, including version checking, parsing, cleaning, extracting, staging, chunking data, and integrating with cloud @@ -29,13 +29,13 @@ class UnstructuredModules: UNSTRUCTURED_MIN_VERSION = "0.10.30" # Define the minimum version def __init__(self): - r"""Initializes the UnstructuredModules class and ensures the + r"""Initializes the UnstructuredIO class and ensures the installed version of Unstructured library meets the minimum requirements. """ - self.ensure_unstructured_version(self.UNSTRUCTURED_MIN_VERSION) + self._ensure_unstructured_version(self.UNSTRUCTURED_MIN_VERSION) - def ensure_unstructured_version(self, min_version: str) -> None: + def _ensure_unstructured_version(self, min_version: str) -> None: r"""Validates that the installed 'Unstructured' library version satisfies the specified minimum version requirement. This function is essential for ensuring compatibility with features that depend on a @@ -80,7 +80,7 @@ def parse_file_or_url( Args: input_path (str): Path to the file or URL to be parsed. - **kwargs Extra kwargs passed to the partition function + **kwargs: Extra kwargs passed to the partition function. Returns: List[Any]: The elements after parsing the file or URL, could be a @@ -147,10 +147,10 @@ def clean_text_data( clean_options: Optional[List[Tuple[str, Dict[str, Any]]]] = None, ) -> str: r"""Cleans text data using a variety of cleaning functions provided by - the `'unstructured'` library. + the `unstructured` library. This function applies multiple text cleaning utilities by calling the - `'unstructured'` library's cleaning bricks for operations like + `unstructured` library's cleaning bricks for operations like replacing unicode quotes, removing extra whitespace, dashes, non-ascii characters, and more. @@ -161,36 +161,27 @@ def clean_text_data( Args: text (str): The text to be cleaned. - clean_options (dict): A dictionary specifying which - cleaning options to apply. The keys should - match the names of the cleaning functions, - and the values should be dictionaries - containing the parameters for each - function. Supported types: - 'clean_extra_whitespace', - 'clean_bullets', - 'clean_ordered_bullets', - 'clean_postfix', - 'clean_prefix', - 'clean_dashes', - 'clean_trailing_punctuation', - 'clean_non_ascii_chars', - 'group_broken_paragraphs', - 'remove_punctuation', - 'replace_unicode_quotes', - 'bytes_string_to_string', - 'translate_text'. + clean_options (dict): A dictionary specifying which cleaning + options to apply. The keys should match the names of the + cleaning functions, and the values should be dictionaries + containing the parameters for each function. Supported types: + 'clean_extra_whitespace', 'clean_bullets', + 'clean_ordered_bullets', 'clean_postfix', 'clean_prefix', + 'clean_dashes', 'clean_trailing_punctuation', + 'clean_non_ascii_chars', 'group_broken_paragraphs', + 'remove_punctuation', 'replace_unicode_quotes', + 'bytes_string_to_string', 'translate_text'. Returns: str: The cleaned text. Raises: AttributeError: If a cleaning option does not correspond to a - valid cleaning function in 'unstructured'. + valid cleaning function in `unstructured`. Notes: The 'options' dictionary keys must correspond to valid cleaning - brick names from the 'unstructured' library. + brick names from the `unstructured` library. Each brick's parameters must be provided in a nested dictionary as the value for the key. @@ -246,7 +237,7 @@ def clean_text_data( **params) else: raise ValueError( - f"'{func_name}' is not a valid function in 'unstructured'." + f"'{func_name}' is not a valid function in `unstructured`." ) return cleaned_text @@ -267,15 +258,11 @@ def extract_data_from_text( Args: text (str): Text to extract data from. extract_type (Literal['extract_datetimetz', - 'extract_email_address', - 'extract_ip_address', - 'extract_ip_address_name', - 'extract_mapi_id', - 'extract_ordered_bullets', - 'extract_text_after', - 'extract_text_before', - 'extract_us_phone_number']): - Type of data to extract. + 'extract_email_address', 'extract_ip_address', + 'extract_ip_address_name', 'extract_mapi_id', + 'extract_ordered_bullets', 'extract_text_after', + 'extract_text_before', 'extract_us_phone_number']): Type of + data to extract. **kwargs: Additional keyword arguments for specific extraction functions. @@ -337,18 +324,12 @@ def stage_elements( Args: elements (List[Any]): List of Element objects to be staged. - stage_type (Literal['convert_to_csv', - 'convert_to_dataframe', - 'convert_to_dict', - 'dict_to_elements', - 'stage_csv_for_prodigy', - 'stage_for_prodigy', - 'stage_for_baseplate', - 'stage_for_datasaur', - 'stage_for_label_box', - 'stage_for_label_studio', - 'stage_for_weaviate']): - Type of staging to perform. + stage_type (Literal['convert_to_csv', 'convert_to_dataframe', + 'convert_to_dict', 'dict_to_elements', + 'stage_csv_for_prodigy', 'stage_for_prodigy', + 'stage_for_baseplate', 'stage_for_datasaur', + 'stage_for_label_box', 'stage_for_label_studio', + 'stage_for_weaviate']): Type of staging to perform. **kwargs: Additional keyword arguments specific to the staging type. @@ -413,7 +394,7 @@ def chunk_elements(self, elements: List[Any], chunk_type: str, Args: elements (List[Any]): List of Element objects to be chunked. chunk_type (str): Type chunk going to apply. Supported types: - 'chunk_by_title'. + 'chunk_by_title'. **kwargs: Additional keyword arguments for chunking. Returns: diff --git a/camel/retrievers/vector_retriever.py b/camel/retrievers/vector_retriever.py index 4646b15f3e..7c30377376 100644 --- a/camel/retrievers/vector_retriever.py +++ b/camel/retrievers/vector_retriever.py @@ -14,7 +14,7 @@ from typing import Any, Dict, List, Optional from camel.embeddings import BaseEmbedding, OpenAIEmbedding -from camel.functions import UnstructuredModules +from camel.functions import UnstructuredIO from camel.retrievers import BaseRetriever from camel.storages import BaseVectorStorage, VectorDBQuery, VectorRecord @@ -58,7 +58,7 @@ def process( # type: ignore embeddings. **kwargs (Any): Additional keyword arguments for elements chunking. """ - unstructured_modules = UnstructuredModules() + unstructured_modules = UnstructuredIO() elements = unstructured_modules.parse_file_or_url(content_input_path) chunks = unstructured_modules.chunk_elements( chunk_type="chunk_by_title", elements=elements, **kwargs) diff --git a/camel/utils/__init__.py b/camel/utils/__init__.py index f7b59aa5c2..f39184095a 100644 --- a/camel/utils/__init__.py +++ b/camel/utils/__init__.py @@ -19,6 +19,7 @@ download_tasks, get_task_list, check_server_running, + role_playing_with_function, get_system_information, to_pascal, PYDANTIC_V2, @@ -45,4 +46,5 @@ 'BaseTokenCounter', 'OpenAITokenCounter', 'OpenSourceTokenCounter', + 'role_playing_with_function', ] diff --git a/camel/utils/commons.py b/camel/utils/commons.py index bec907eda9..26c2430337 100644 --- a/camel/utils/commons.py +++ b/camel/utils/commons.py @@ -29,6 +29,18 @@ F = TypeVar('F', bound=Callable[..., Any]) +# Set lazy import +def get_lazy_imported_functions_module(): + from camel.functions import MATH_FUNCS, SEARCH_FUNCS, WEATHER_FUNCS + return [*MATH_FUNCS, *SEARCH_FUNCS, *WEATHER_FUNCS] + + +# Set lazy import +def get_lazy_imported_types_module(): + from camel.types import ModelType + return ModelType.GPT_4_TURBO + + def openai_api_key_required(func: F) -> F: r"""Decorator that checks if the OpenAI API key is available in the environment variables. @@ -226,3 +238,131 @@ def to_pascal(snake: str) -> str: PYDANTIC_V2 = pydantic.VERSION.startswith("2.") + + +def role_playing_with_function( + task_prompt: str = ("Assume now is 2024 in the Gregorian calendar, " + "estimate the current age of University of Oxford " + "and then add 10 more years to this age, " + "and get the current weather of the city where " + "the University is located."), + function_list: Optional[List] = None, + model_type=None, + chat_turn_limit=10, + assistant_role_name: str = "Searcher", + user_role_name: str = "Professor", +) -> None: + r"""Initializes and conducts a `RolePlaying` with `FunctionCallingConfig` + session. The function creates an interactive and dynamic role-play session + where the AI Assistant and User engage based on the given task, roles, and + available functions. It demonstrates the versatility of AI in handling + diverse tasks and user interactions within a structured `RolePlaying` + framework. + + Args: + task_prompt (str): The initial task or scenario description to start + the `RolePlaying` session. Defaults to a prompt involving the + estimation of KAUST's age and weather information. + function_list (list): A list of functions that the agent can utilize + during the session. Defaults to a combination of math, search, and + weather functions. + model_type (ModelType): The type of chatbot model used for both the + assistant and the user. Defaults to `GPT-4 Turbo`. + chat_turn_limit (int): The maximum number of turns (exchanges) in the + chat session. Defaults to 10. + assistant_role_name (str): The role name assigned to the AI Assistant. + Defaults to 'Searcher'. + user_role_name (str): The role name assigned to the User. Defaults to + 'Professor'. + + Returns: + None: This function does not return any value but prints out the + session's dialogues and outputs. + """ + + # Run lazy import + if function_list is None: + function_list = get_lazy_imported_functions_module() + if model_type is None: + model_type = get_lazy_imported_types_module() + + from colorama import Fore + + from camel.agents.chat_agent import FunctionCallingRecord + from camel.configs import ChatGPTConfig, FunctionCallingConfig + from camel.societies import RolePlaying + + task_prompt = task_prompt + + user_model_config = ChatGPTConfig(temperature=0.0) + + function_list = function_list + assistant_model_config = FunctionCallingConfig.from_openai_function_list( + function_list=function_list, + kwargs=dict(temperature=0.0), + ) + + role_play_session = RolePlaying( + assistant_role_name=assistant_role_name, + user_role_name=user_role_name, + assistant_agent_kwargs=dict( + model_type=model_type, + model_config=assistant_model_config, + function_list=function_list, + ), + user_agent_kwargs=dict( + model_type=model_type, + model_config=user_model_config, + ), + task_prompt=task_prompt, + with_task_specify=False, + ) + + print( + Fore.GREEN + + f"AI Assistant sys message:\n{role_play_session.assistant_sys_msg}\n") + print(Fore.BLUE + + f"AI User sys message:\n{role_play_session.user_sys_msg}\n") + + print(Fore.YELLOW + f"Original task prompt:\n{task_prompt}\n") + print( + Fore.CYAN + + f"Specified task prompt:\n{role_play_session.specified_task_prompt}\n") + print(Fore.RED + f"Final task prompt:\n{role_play_session.task_prompt}\n") + + n = 0 + input_msg = role_play_session.init_chat() + while n < chat_turn_limit: + n += 1 + assistant_response, user_response = role_play_session.step(input_msg) + + if assistant_response.terminated: + print(Fore.GREEN + + ("AI Assistant terminated. Reason: " + f"{assistant_response.info['termination_reasons']}.")) + break + if user_response.terminated: + print(Fore.GREEN + + ("AI User terminated. " + f"Reason: {user_response.info['termination_reasons']}.")) + break + + # Print output from the user + print_text_animated(Fore.BLUE + + f"AI User:\n\n{user_response.msg.content}\n") + + # Print output from the assistant, including any function + # execution information + print_text_animated(Fore.GREEN + "AI Assistant:") + + called_functions: List[ + FunctionCallingRecord] = assistant_response.info[ + 'called_functions'] + for func_record in called_functions: + print_text_animated(f"{func_record}") + print_text_animated(f"{assistant_response.msg.content}\n") + + if "CAMEL_TASK_DONE" in user_response.msg.content: + break + + input_msg = assistant_response.msg diff --git a/docs/tutorials_and_cookbooks/rag_cookbook.ipynb b/docs/tutorials_and_cookbooks/rag_cookbook.ipynb new file mode 100644 index 0000000000..c50162f0a0 --- /dev/null +++ b/docs/tutorials_and_cookbooks/rag_cookbook.ipynb @@ -0,0 +1,553 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RAG Cookbook\n", + "\n", + "In this notebook, we show the useage of CAMEL Retriever Module in both customized way and auto way. We will also show how to combine `AutoRetriever` with `ChatAgent`, and further combine `AutoRetriever` with `RolePlaying` by using `Function Calling`.\n", + "\n", + "4 main parts included:\n", + "- Customized RAG\n", + "- Auto RAG\n", + "- Single Agent with Auto RAG\n", + "- Role-playing with Auto RAG" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data\n", + "\n", + "Let's first load the CAMEL paper from https://arxiv.org/pdf/2303.17760.pdf. This will be our local example data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import requests\n", + "\n", + "os.makedirs('local_data', exist_ok=True)\n", + "\n", + "url = \"https://arxiv.org/pdf/2303.17760.pdf\"\n", + "response = requests.get(url)\n", + "with open('local_data/camel paper.pdf', 'wb') as file:\n", + " file.write(response.content)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Customized RAG\n", + "In this section we will set our customized RAG pipeline, we will take `VectorRetriever` as an example\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set embedding model, we will use `OpenAIEmbedding` as the embedding model, so we need to set the `OPENAI_API_KEY` in below." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"Your Key\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import and set the embedding instance:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from camel.embeddings import OpenAIEmbedding\n", + "\n", + "embedding_instance = OpenAIEmbedding()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import and set the vector storage instance:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from camel.storages import MilvusStorage\n", + "\n", + "storage_instance = MilvusStorage(\n", + " vector_dim=embedding_instance.get_output_dim(),\n", + " url_and_api_key=(\"Your Milvus URI\",\"Your Milvus Token\"),\n", + " collection_name=\"camel_paper\",\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import and set the retriever instance:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from camel.retrievers import VectorRetriever\n", + "\n", + "vector_retriever = VectorRetriever(embedding_model=embedding_instance)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We use integrated `Unstructured Module` to splite the content into small chunks, the content will be splited automacitlly with its `chunk_by_title` function, the max character for each chunk is 500 characters, which is a suitable length for `OpenAIEmbedding`. All the text in the chunks will be embed and stored to the vector storage instance, it will take some time, please wait.." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "vector_retriever.process(\n", + " content_input_path=\"local_data/camel paper.pdf\",\n", + " storage=storage_instance,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can retrieve information from the vector storage by giving a query. By default it will give you back the text content from top 1 chunk with highest Cosine similarity score, and the similarity score should be higher than 0.75 to ensure the retrieved content is relevant to the query. You can also change the `top_k` value and `similarity_threshold` value with your needs.\n", + "\n", + "The returned string list includes:\n", + "- similarity score\n", + "- content path\n", + "- metadata\n", + "- text" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'similarity score': '0.8321741223335266', 'content path': 'local_data/camel paper.pdf', 'metadata': {'filetype': 'application/pdf', 'languages': ['eng'], 'last_modified': '2024-03-24T17:58:24', 'page_number': 45}, 'text': 'CAMEL Data and Code License The intended purpose and licensing of CAMEL is solely for research use. The source code is licensed under Apache 2.0. The datasets are licensed under CC BY NC 4.0, which permits only non-commercial usage. It is advised that any models trained using the dataset should not be utilized for anything other than research purposes.\\n\\n45'}]\n" + ] + } + ], + "source": [ + "retrieved_info = vector_retriever.query(\n", + " query=\"What is CAMEL?\", storage=storage_instance, top_k=1\n", + ")\n", + "print(retrieved_info)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's try an irrelevant query:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'text': 'No suitable information retrieved from local_data/camel paper.pdf with similarity_threshold = 0.75.'}]\n" + ] + } + ], + "source": [ + "retrieved_info_irrevelant = vector_retriever.query(\n", + " query=\"Compared with dumpling and rice, which should I take for dinner?\",\n", + " storage=storage_instance,\n", + " top_k=1,\n", + ")\n", + "\n", + "print(retrieved_info_irrevelant)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Auto RAG\n", + "In this section we will run the `AutoRetriever` with default settings. It uses `OpenAIEmbedding` as default embedding model and `Milvus` as default vector storage.\n", + "\n", + "What you need to do is:\n", + "- Set content input paths, which can be local paths or remote urls\n", + "- Set remote url and api key for Milvus\n", + "- Give a query\n", + "\n", + "The Auto RAG pipeline would create collections for given content input paths, the collection name will be set automaticlly based on the content input path name, if the collection exists, it will do the retrieve directly." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original Query:\n", + "{What is CAMEL-AI}\n", + "Retrieved Context:\n", + "{'similarity score': '0.8369356393814087', 'content path': 'local_data/camel paper.pdf', 'metadata': {'filetype': 'application/pdf', 'languages': ['eng'], 'last_modified': '2024-03-24T17:58:24', 'page_number': 7}, 'text': 'Section 3.2, to simulate assistant-user cooperation. For our analysis, we set our attention on AI Society setting. We also gathered conversational data, named CAMEL AI Society and CAMEL Code datasets and problem-solution pairs data named CAMEL Math and CAMEL Science and analyzed and evaluated their quality. Moreover, we will discuss potential extensions of our framework and highlight both the risks and opportunities that future AI society might present.'}\n", + "{'similarity score': '0.8378663659095764', 'content path': 'https://www.camel-ai.org/', 'metadata': {'emphasized_text_contents': ['Mission', 'CAMEL-AI.org', 'is an open-source community dedicated to the study of autonomous and communicative agents. We believe that studying these agents on a large scale offers valuable insights into their behaviors, capabilities, and potential risks. To facilitate research in this field, we provide, implement, and support various types of agents, tasks, prompts, models, datasets, and simulated environments.', 'Join us via', 'Slack', 'Discord', 'or'], 'emphasized_text_tags': ['span', 'span', 'span', 'span', 'span', 'span', 'span'], 'filetype': 'text/html', 'languages': ['eng'], 'link_texts': [None, None, None], 'link_urls': ['#h.3f4tphhd9pn8', 'https://join.slack.com/t/camel-ai/shared_invite/zt-1vy8u9lbo-ZQmhIAyWSEfSwLCl2r2eKA', 'https://discord.gg/CNcNpquyDc'], 'page_number': 1, 'url': 'https://www.camel-ai.org/'}, 'text': 'Mission\\n\\nCAMEL-AI.org is an open-source community dedicated to the study of autonomous and communicative agents. We believe that studying these agents on a large scale offers valuable insights into their behaviors, capabilities, and potential risks. To facilitate research in this field, we provide, implement, and support various types of agents, tasks, prompts, models, datasets, and simulated environments.\\n\\nJoin us via\\n\\nSlack\\n\\nDiscord\\n\\nor'}\n" + ] + } + ], + "source": [ + "from camel.retrievers import AutoRetriever\n", + "from camel.types import StorageType\n", + "\n", + "auto_retriever = AutoRetriever(\n", + " url_and_api_key=(\"Your Milvus URI\",\"Your Milvus Token\"),\n", + " storage_type=StorageType.MILVUS,\n", + " embedding_model=embedding_instance)\n", + "\n", + "retrieved_info = auto_retriever.run_vector_retriever(\n", + " query=\"What is CAMEL-AI\",\n", + " content_input_paths=[\n", + " \"local_data/camel paper.pdf\", # example local path\n", + " \"https://www.camel-ai.org/\", # example remote url\n", + " ],\n", + " top_k=1,\n", + " return_detailed_info=True,\n", + ")\n", + "\n", + "print(retrieved_info)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Single Agent with Auto RAG\n", + "In this section we will show how to combine the `AutoRetriever` with one `ChatAgent`." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's set an agent function, in this function we can get the response by providing a query to this agent." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CAMEL-AI is an open-source community dedicated to the study of autonomous and communicative agents. It provides, implements, and supports various types of agents, tasks, prompts, models, datasets, and simulated environments to facilitate research in this field.\n" + ] + } + ], + "source": [ + "from camel.agents import ChatAgent\n", + "from camel.messages import BaseMessage\n", + "from camel.types import RoleType\n", + "from camel.retrievers import AutoRetriever\n", + "from camel.types import StorageType\n", + "\n", + "def single_agent(query: str) ->str :\n", + " # Set agent role\n", + " assistant_sys_msg = BaseMessage(\n", + " role_name=\"Assistant\",\n", + " role_type=RoleType.ASSISTANT,\n", + " meta_dict=None,\n", + " content=\"You are a helpful assistant to answer question, I will give you the Original Query and Retrieved Context, answer the Original Query based on the Retrieved Context, if you can't answer the question just say I don't know.\",\n", + " )\n", + "\n", + " # Add auto retriever\n", + " auto_retriever = AutoRetriever(\n", + " url_and_api_key=(\"Your Milvus URI\",\"Your Milvus Token\"),\n", + " storage_type=StorageType.MILVUS,\n", + " embedding_model=embedding_instance)\n", + "\n", + " retrieved_info = auto_retriever.run_vector_retriever(\n", + " query=query,\n", + " content_input_paths=[\n", + " \"local_data/camel paper.pdf\", # example local path\n", + " \"https://www.camel-ai.org/\", # example remote url\n", + " ],\n", + " # vector_storage_local_path=\"storage_default_run\",\n", + " top_k=1,\n", + " return_detailed_info=True,\n", + " )\n", + "\n", + " # Pass the retrieved infomation to agent\n", + " user_msg = BaseMessage.make_user_message(role_name=\"User\", content=retrieved_info)\n", + " agent = ChatAgent(assistant_sys_msg)\n", + "\n", + " # Get response\n", + " assistant_response = agent.step(user_msg)\n", + " return assistant_response.msg.content\n", + "\n", + "print(single_agent(\"What is CAMEL-AI\"))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Role-playing with Auto RAG\n", + "In this section we will show how to combine the `AutoRetriever` with `RolePlaying` by applying `Function Calling`." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we need to set a retriever function with well-written docstring for LLM to understand what this function is used for, the main code is the same with the Auto RAG section." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import List\n", + "from camel.functions import OpenAIFunction\n", + "from camel.retrievers import AutoRetriever\n", + "from camel.types import StorageType\n", + "\n", + "def local_retriever(query: str) -> str:\n", + " r\"\"\"Performs an auto local retriever for information. Given a query,\n", + " this function will retrieve the information from the local vector storage,\n", + " and return the retrieved information back. It is useful for information\n", + " retrieve.\n", + "\n", + " Args:\n", + " query (string): Question you want to be answered.\n", + "\n", + " Returns:\n", + " str: Aggregated information retrieved in response to the query.\n", + "\n", + " Example:\n", + " local_retriever(query = \"what is camel?\")\n", + " \"\"\"\n", + " auto_retriever = AutoRetriever(\n", + " url_and_api_key=(\"Your Milvus URI\",\"Your Milvus Token\"),\n", + " storage_type=StorageType.MILVUS,\n", + " embedding_model=embedding_instance)\n", + "\n", + " retrieved_info = auto_retriever.run_vector_retriever(\n", + " query=query,\n", + " content_input_paths=[\n", + " \"local_data/camel paper.pdf\", # example local path\n", + " ],\n", + " top_k=1\n", + " )\n", + " return retrieved_info\n", + "\n", + "# add the function to OpenAIFunction list\n", + "RETRIEVER_FUNCS: List[OpenAIFunction] = [\n", + " OpenAIFunction(func)\n", + " for func in [\n", + " local_retriever,\n", + " ]\n", + "]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run the role-playing with defined retriever function:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32mAI Assistant sys message:\n", + "BaseMessage(role_name='Searcher', role_type=, meta_dict={'task': \"What is the main termination reasons for AI Society dataset, how many number of messages did camel decided to limit, what's the value plus 100?\", 'assistant_role': 'Searcher', 'user_role': 'Professor'}, content=\"===== RULES OF ASSISTANT =====\\nNever forget you are a Searcher and I am a Professor. Never flip roles! Never instruct me!\\nWe share a common interest in collaborating to successfully complete a task.\\nYou must help me to complete the task.\\nHere is the task: What is the main termination reasons for AI Society dataset, how many number of messages did camel decided to limit, what's the value plus 100?. Never forget our task!\\nI must instruct you based on your expertise and my needs to complete the task.\\n\\nI must give you one instruction at a time.\\nYou must write a specific solution that appropriately solves the requested instruction and explain your solutions.\\nYou must decline my instruction honestly if you cannot perform the instruction due to physical, moral, legal reasons or your capability and explain the reasons.\\nUnless I say the task is completed, you should always start with:\\n\\nSolution: \\n\\n should be very specific, include detailed explanations and provide preferable detailed implementations and examples and lists for task-solving.\\nAlways end with: Next request.\")\n", + "\n", + "\u001b[34mAI User sys message:\n", + "BaseMessage(role_name='Professor', role_type=, meta_dict={'task': \"What is the main termination reasons for AI Society dataset, how many number of messages did camel decided to limit, what's the value plus 100?\", 'assistant_role': 'Searcher', 'user_role': 'Professor'}, content='===== RULES OF USER =====\\nNever forget you are a Professor and I am a Searcher. Never flip roles! You will always instruct me.\\nWe share a common interest in collaborating to successfully complete a task.\\nI must help you to complete the task.\\nHere is the task: What is the main termination reasons for AI Society dataset, how many number of messages did camel decided to limit, what\\'s the value plus 100?. Never forget our task!\\nYou must instruct me based on my expertise and your needs to solve the task ONLY in the following two ways:\\n\\n1. Instruct with a necessary input:\\nInstruction: \\nInput: \\n\\n2. Instruct without any input:\\nInstruction: \\nInput: None\\n\\nThe \"Instruction\" describes a task or question. The paired \"Input\" provides further context or information for the requested \"Instruction\".\\n\\nYou must give me one instruction at a time.\\nI must write a response that appropriately solves the requested instruction.\\nI must decline your instruction honestly if I cannot perform the instruction due to physical, moral, legal reasons or my capability and explain the reasons.\\nYou should instruct me not ask me questions.\\nNow you must start to instruct me using the two ways described above.\\nDo not add anything else other than your instruction and the optional corresponding input!\\nKeep giving me instructions and necessary inputs until you think the task is completed.\\nWhen the task is completed, you must only reply with a single word .\\nNever say unless my responses have solved your task.')\n", + "\n", + "\u001b[33mOriginal task prompt:\n", + "What is the main termination reasons for AI Society dataset, how many number of messages did camel decided to limit, what's the value plus 100?\n", + "\n", + "\u001b[36mSpecified task prompt:\n", + "None\n", + "\n", + "\u001b[31mFinal task prompt:\n", + "What is the main termination reasons for AI Society dataset, how many number of messages did camel decided to limit, what's the value plus 100?\n", + "\n", + "\u001b[34mAI User:\n", + "\n", + "Instruction: Provide the main termination reasons from the AI Society dataset.\n", + "Input: None\n", + "\n", + "\n", + "\u001b[32mAI Assistant:\n", + "\n", + "Function Execution: local_retriever\n", + "\tArgs: {'query': 'main termination reasons for AI Society dataset'}\n", + "\tResult: Original Query:\n", + "{main termination reasons for AI Society dataset}\n", + "Retrieved Context:\n", + "Next we examine the conversation termination reasons for both AI Society and Code datasets. As can be seen in Figure 8, the main termination reasons for AI Society dataset is Assistant Instruct whereas for Code it is Token Limit. The latter is expected as the since responses that contain code tend to be long. It is also interesting to note that in both datasets, the termination due to Maximum Number of Messages is low indicating that the limit of 40 maximum messages is reasonable. Our decision\n", + "\n", + "Solution: The main termination reason for the AI Society dataset is \"Assistant Instruct.\" This indicates that the conversations in this dataset typically end when the assistant is instructed to terminate the conversation.\n", + "\n", + "Next request.\n", + "\n", + "\n", + "\u001b[34mAI User:\n", + "\n", + "Instruction: Identify the number of messages that camel decided to limit.\n", + "Input: None\n", + "\n", + "\n", + "\u001b[32mAI Assistant:\n", + "\n", + "Function Execution: local_retriever\n", + "\tArgs: {'query': 'number of messages camel decided to limit'}\n", + "\tResult: Original Query:\n", + "{number of messages camel decided to limit}\n", + "Retrieved Context:\n", + "to limit the number of messages to 40 is also cost-related. Even if we provide a set of termination conditions, we still want to put a safeguard to the maximum limit of the message. It is because after the task is completed the agents will provide short outputs like \"thank you\" and \"welcome\". If no safeguard is set and termination fails, the conversation will only end until it exceeds the token limit, which may end up with thousands of API calls and hundreds of USD dollars cost.\n", + "\n", + "Solution: Camel decided to limit the number of messages to 40 as a safeguard to prevent excessive API calls and associated costs.\n", + "\n", + "Next request.\n", + "\n", + "\n", + "\u001b[34mAI User:\n", + "\n", + "Instruction: Calculate the value of the message limit plus 100.\n", + "Input: None\n", + "\n", + "\n", + "\u001b[32mAI Assistant:\n", + "\n", + "Function Execution: add\n", + "\tArgs: {'a': 40, 'b': 100}\n", + "\tResult: 140\n", + "\n", + "Solution: The value of the message limit plus 100 is 140.\n", + "\n", + "Next request.\n", + "\n", + "\n", + "\u001b[34mAI User:\n", + "\n", + "CAMEL_TASK_DONE\n", + "\n", + "\n", + "\u001b[32mAI Assistant:\n", + "\n", + "Solution: Understood, the task is completed.\n", + "\n", + "If you have any more tasks or need further assistance, feel free to provide new instructions.\n", + "\n", + "\n" + ] + } + ], + "source": [ + "from camel.utils import role_playing_with_function\n", + "from camel.functions import MATH_FUNCS # import another function from camel\n", + "\n", + "role_playing_with_function(\n", + " task_prompt=(\n", + " \"What is the main termination reasons for AI Society dataset, how many number of messages did camel decided to limit, what's the value plus 100?\"\n", + " ),\n", + " function_list=[*RETRIEVER_FUNCS, *MATH_FUNCS],\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "camel-ai-gyT5aXID-py3.10", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/function_call/role_playing_with_function.py b/examples/function_call/role_playing_with_function.py index a21c783310..cf3e9ecfcb 100644 --- a/examples/function_call/role_playing_with_function.py +++ b/examples/function_call/role_playing_with_function.py @@ -11,96 +11,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. =========== -from typing import List -from colorama import Fore - -from camel.agents.chat_agent import FunctionCallingRecord -from camel.configs import ChatGPTConfig, FunctionCallingConfig -from camel.functions import MATH_FUNCS, SEARCH_FUNCS, WEATHER_FUNCS -from camel.societies import RolePlaying from camel.types import ModelType -from camel.utils import print_text_animated +from camel.utils import role_playing_with_function def main(model_type=ModelType.GPT_4, chat_turn_limit=10) -> None: - task_prompt = ("Assume now is 2024 in the Gregorian calendar, " - "estimate the current age of University of Oxford " - "and then add 10 more years to this age, " - "and get the current weather of the city where " - "the University is located.") - - user_model_config = ChatGPTConfig(temperature=0.0) - - function_list = [*MATH_FUNCS, *SEARCH_FUNCS, *WEATHER_FUNCS] - assistant_model_config = FunctionCallingConfig.from_openai_function_list( - function_list=function_list, - kwargs=dict(temperature=0.0), - ) - - role_play_session = RolePlaying( - assistant_role_name="Searcher", - user_role_name="Professor", - assistant_agent_kwargs=dict( - model_type=model_type, - model_config=assistant_model_config, - function_list=function_list, - ), - user_agent_kwargs=dict( - model_type=model_type, - model_config=user_model_config, - ), - task_prompt=task_prompt, - with_task_specify=False, - ) - - print( - Fore.GREEN + - f"AI Assistant sys message:\n{role_play_session.assistant_sys_msg}\n") - print(Fore.BLUE + - f"AI User sys message:\n{role_play_session.user_sys_msg}\n") - - print(Fore.YELLOW + f"Original task prompt:\n{task_prompt}\n") - print( - Fore.CYAN + - f"Specified task prompt:\n{role_play_session.specified_task_prompt}\n") - print(Fore.RED + f"Final task prompt:\n{role_play_session.task_prompt}\n") - - n = 0 - input_msg = role_play_session.init_chat() - while n < chat_turn_limit: - n += 1 - assistant_response, user_response = role_play_session.step(input_msg) - - if assistant_response.terminated: - print(Fore.GREEN + - ("AI Assistant terminated. Reason: " - f"{assistant_response.info['termination_reasons']}.")) - break - if user_response.terminated: - print(Fore.GREEN + - ("AI User terminated. " - f"Reason: {user_response.info['termination_reasons']}.")) - break - - # Print output from the user - print_text_animated(Fore.BLUE + - f"AI User:\n\n{user_response.msg.content}\n") - - # Print output from the assistant, including any function - # execution information - print_text_animated(Fore.GREEN + "AI Assistant:") - called_functions: List[ - FunctionCallingRecord] = assistant_response.info[ - 'called_functions'] - for func_record in called_functions: - print_text_animated(f"{func_record}") - print_text_animated(f"{assistant_response.msg.content}\n") - - if "CAMEL_TASK_DONE" in user_response.msg.content: - break - - input_msg = assistant_response.msg + role_playing_with_function(model_type=model_type, + chat_turn_limit=chat_turn_limit) if __name__ == "__main__": diff --git a/examples/io/unstructured_modules_example.py b/examples/loaders/unstructured_io_example.py similarity index 97% rename from examples/io/unstructured_modules_example.py rename to examples/loaders/unstructured_io_example.py index a1cda0bf70..a070aea1a2 100644 --- a/examples/io/unstructured_modules_example.py +++ b/examples/loaders/unstructured_io_example.py @@ -14,9 +14,9 @@ import os -from camel.functions.unstructured_io_fuctions import UnstructuredModules +from camel.loaders.unstructured_io import UnstructuredIO -unstructured_modules = UnstructuredModules() +unstructured_modules = UnstructuredIO() def parse_file_example(): diff --git a/examples/test/test_unstructured_io_example.py b/examples/test/test_unstructured_io_example.py index 41537c4cb5..0b079f7cc7 100644 --- a/examples/test/test_unstructured_io_example.py +++ b/examples/test/test_unstructured_io_example.py @@ -16,7 +16,7 @@ import pytest -from examples.io.unstructured_modules_example import ( +from examples.loaders.unstructured_io_example import ( chunk_url_content_example, clean_text_example, extract_data_example, diff --git a/test/functions/test_base_data_io_functions.py b/test/loaders/test_base_io.py similarity index 99% rename from test/functions/test_base_data_io_functions.py rename to test/loaders/test_base_io.py index e6a73827a3..6a37d2e557 100644 --- a/test/functions/test_base_data_io_functions.py +++ b/test/loaders/test_base_io.py @@ -16,7 +16,7 @@ import pytest -from camel.functions.base_io_functions import ( +from camel.loaders.base_io import ( DocxFile, File, HtmlFile, diff --git a/test/functions/test_unstructured_io_functions.py b/test/loaders/test_unstructured_io.py similarity index 87% rename from test/functions/test_unstructured_io_functions.py rename to test/loaders/test_unstructured_io.py index e332fbd98d..0854fe55f1 100644 --- a/test/functions/test_unstructured_io_functions.py +++ b/test/loaders/test_unstructured_io.py @@ -15,28 +15,27 @@ import pytest -from camel.functions.unstructured_io_fuctions import UnstructuredModules +from camel.loaders import UnstructuredIO -# Create a fixture to initialize the UnstructuredModules instance +# Create a fixture to initialize the UnstructuredIO instance @pytest.fixture -def unstructured_instance() -> UnstructuredModules: - return UnstructuredModules() +def unstructured_instance() -> UnstructuredIO: + return UnstructuredIO() -# Test the ensure_unstructured_version method -def test_ensure_unstructured_version( - unstructured_instance: UnstructuredModules): +# Test the _ensure_unstructured_version method +def test__ensure_unstructured_version(unstructured_instance: UnstructuredIO): # Test with a valid version - unstructured_instance.ensure_unstructured_version("0.10.30") + unstructured_instance._ensure_unstructured_version("0.10.30") # Test with an invalid version (should raise a ValueError) with pytest.raises(ValueError): - unstructured_instance.ensure_unstructured_version("1.0.0") + unstructured_instance._ensure_unstructured_version("1.0.0") # Test the parse_file_or_url method -def test_parse_file_or_url(unstructured_instance: UnstructuredModules): +def test_parse_file_or_url(unstructured_instance: UnstructuredIO): # You can mock the required dependencies and test different scenarios here # Test parsing a valid URL (mock the necessary dependencies) @@ -51,7 +50,7 @@ def test_parse_file_or_url(unstructured_instance: UnstructuredModules): # Test the clean_text_data method -def test_clean_text_data(unstructured_instance: UnstructuredModules): +def test_clean_text_data(unstructured_instance: UnstructuredIO): # Test with a valid cleaning option test_options: List[Tuple[str, Dict[str, @@ -73,7 +72,7 @@ def test_clean_text_data(unstructured_instance: UnstructuredModules): # Test the extract_data_from_text method -def test_extract_data_from_text(unstructured_instance: UnstructuredModules): +def test_extract_data_from_text(unstructured_instance: UnstructuredIO): # Test extracting an email address test_email_text = "Contact me at example@email.com." extracted_email = unstructured_instance.extract_data_from_text( @@ -89,7 +88,7 @@ def test_extract_data_from_text(unstructured_instance: UnstructuredModules): # Test the stage_elements method -def test_stage_elements_for_csv(unstructured_instance: UnstructuredModules): +def test_stage_elements_for_csv(unstructured_instance: UnstructuredIO): # Test staging for baseplate test_url = ( "https://www.cnn.com/2023/01/30/sport/empire-state-building-green-" @@ -126,7 +125,7 @@ def test_stage_elements_for_csv(unstructured_instance: UnstructuredModules): # Test the chunk_elements method -def test_chunk_elements(unstructured_instance: UnstructuredModules): +def test_chunk_elements(unstructured_instance: UnstructuredIO): # Test chunking content from a url test_url = ( "https://www.cnn.com/2023/01/30/sport/empire-state-building-green-" diff --git a/test/retrievers/test_vector_retriever.py b/test/retrievers/test_vector_retriever.py index 8b66e33918..5e940eaa2c 100644 --- a/test/retrievers/test_vector_retriever.py +++ b/test/retrievers/test_vector_retriever.py @@ -21,7 +21,7 @@ # Mock classes for dependencies MockBaseEmbedding = Mock() MockBaseVectorStorage = Mock() -MockUnstructuredModules = Mock() +MockUnstructuredIO = Mock() @pytest.fixture @@ -52,7 +52,7 @@ def test_initialization_with_default_embedding(): # Test process method -@patch('camel.retrievers.vector_retriever.UnstructuredModules') +@patch('camel.retrievers.vector_retriever.UnstructuredIO') def test_process(mock_unstructured_modules, vector_retriever, mock_vector_storage): # Create a mock chunk with metadata