From e6247c791fbd9996c8f163a521a49fc037ce75e1 Mon Sep 17 00:00:00 2001
From: Li Jiang <bnujli@gmail.com>
Date: Fri, 16 Aug 2024 00:03:06 +0800
Subject: [PATCH] Remove dependency on RetrieveAssistantAgent for RetrieveChat
 (#3320)

* Remove deps on RetrieveAssistantAgent for getting human input

* Terminate when no more context

* Add deprecation warning message

* Clean up RetrieveAssistantAgent, part 1

* Update version

* Clean up docs and notebooks
---
 .../qdrant_retrieve_user_proxy_agent.py       |   6 +
 .../contrib/retrieve_assistant_agent.py       |   6 +
 .../contrib/retrieve_user_proxy_agent.py      |  34 +-
 autogen/version.py                            |   2 +-
 notebook/agentchat_RetrieveChat.ipynb         |  54 +-
 notebook/agentchat_RetrieveChat_mongodb.ipynb |  25 +-
 .../agentchat_RetrieveChat_pgvector.ipynb     |  40 +-
 notebook/agentchat_RetrieveChat_qdrant.ipynb  |  27 +-
 notebook/agentchat_groupchat_RAG.ipynb        | 615 ++++++++++++------
 notebook/agentchat_microsoft_fabric.ipynb     |  11 +-
 .../test_pgvector_retrievechat.py             |   8 +-
 .../retrievechat/test_qdrant_retrievechat.py  |   5 +-
 .../contrib/retrievechat/test_retrievechat.py |   6 +-
 .../blog/2023-10-18-RetrieveChat/index.mdx    | 124 +---
 .../non-openai-models/cloud-gemini.ipynb      |   1 -
 .../cloud-gemini_vertexai.ipynb               |   6 -
 website/docs/topics/retrieval_augmentation.md |  14 +-
 17 files changed, 536 insertions(+), 448 deletions(-)

diff --git a/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py b/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py
index ea81de6dff11..f1cc6947d50e 100644
--- a/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py
+++ b/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import Callable, Dict, List, Literal, Optional
 
 from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
@@ -93,6 +94,11 @@ def __init__(
              **kwargs (dict): other kwargs in [UserProxyAgent](../user_proxy_agent#__init__).
 
         """
+        warnings.warn(
+            "The QdrantRetrieveUserProxyAgent is deprecated. Please use the RetrieveUserProxyAgent instead, set `vector_db` to `qdrant`.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         super().__init__(name, human_input_mode, is_termination_msg, retrieve_config, **kwargs)
         self._client = self._retrieve_config.get("client", QdrantClient(":memory:"))
         self._embedding_model = self._retrieve_config.get("embedding_model", "BAAI/bge-small-en-v1.5")
diff --git a/autogen/agentchat/contrib/retrieve_assistant_agent.py b/autogen/agentchat/contrib/retrieve_assistant_agent.py
index 9b5ace200dc6..173bc4432e78 100644
--- a/autogen/agentchat/contrib/retrieve_assistant_agent.py
+++ b/autogen/agentchat/contrib/retrieve_assistant_agent.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 from autogen.agentchat.agent import Agent
@@ -16,6 +17,11 @@ class RetrieveAssistantAgent(AssistantAgent):
     """
 
     def __init__(self, *args, **kwargs):
+        warnings.warn(
+            "The RetrieveAssistantAgent is deprecated. Please use the AssistantAgent instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         super().__init__(*args, **kwargs)
         self.register_reply(Agent, RetrieveAssistantAgent._generate_retrieve_assistant_reply)
 
diff --git a/autogen/agentchat/contrib/retrieve_user_proxy_agent.py b/autogen/agentchat/contrib/retrieve_user_proxy_agent.py
index 90757af6fc3e..10b70e0e9720 100644
--- a/autogen/agentchat/contrib/retrieve_user_proxy_agent.py
+++ b/autogen/agentchat/contrib/retrieve_user_proxy_agent.py
@@ -189,7 +189,7 @@ def __init__(
                     interactive retrieval. Default is True.
                 - `collection_name` (Optional, str) - the name of the collection.
                     If key not provided, a default name `autogen-docs` will be used.
-                - `get_or_create` (Optional, bool) - Whether to get the collection if it exists. Default is True.
+                - `get_or_create` (Optional, bool) - Whether to get the collection if it exists. Default is False.
                 - `overwrite` (Optional, bool) - Whether to overwrite the collection if it exists. Default is False.
                     Case 1. if the collection does not exist, create the collection.
                     Case 2. the collection exists, if overwrite is True, it will overwrite the collection.
@@ -306,6 +306,10 @@ def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str =
                 self._db_config["embedding_function"] = self._embedding_function
             self._vector_db = VectorDBFactory.create_vector_db(db_type=self._vector_db, **self._db_config)
         self.register_reply(Agent, RetrieveUserProxyAgent._generate_retrieve_user_reply, position=2)
+        self.register_hook(
+            hookable_method="process_message_before_send",
+            hook=self._check_update_context_before_send,
+        )
 
     def _init_db(self):
         if not self._vector_db:
@@ -400,6 +404,34 @@ def _is_termination_msg_retrievechat(self, message):
         update_context_case1, update_context_case2 = self._check_update_context(message)
         return not (contain_code or update_context_case1 or update_context_case2)
 
+    def _check_update_context_before_send(self, sender, message, recipient, silent):
+        if not isinstance(message, (str, dict)):
+            return message
+        elif isinstance(message, dict):
+            msg_text = message.get("content", message)
+        else:
+            msg_text = message
+
+        if "UPDATE CONTEXT" == msg_text.strip().upper():
+            doc_contents = self._get_context(self._results)
+
+            # Always use self.problem as the query text to retrieve docs, but each time we replace the context with the
+            # next similar docs in the retrieved doc results.
+            if not doc_contents:
+                for _tmp_retrieve_count in range(1, 5):
+                    self._reset(intermediate=True)
+                    self.retrieve_docs(
+                        self.problem, self.n_results * (2 * _tmp_retrieve_count + 1), self._search_string
+                    )
+                    doc_contents = self._get_context(self._results)
+                    if doc_contents or self.n_results * (2 * _tmp_retrieve_count + 1) >= len(self._results[0]):
+                        break
+            msg_text = self._generate_message(doc_contents, task=self._task)
+
+        if isinstance(message, dict):
+            message["content"] = msg_text
+        return message
+
     @staticmethod
     def get_max_tokens(model="gpt-3.5-turbo"):
         if "32k" in model:
diff --git a/autogen/version.py b/autogen/version.py
index 1ee4e635546e..9b1b78b4b3a0 100644
--- a/autogen/version.py
+++ b/autogen/version.py
@@ -1 +1 @@
-__version__ = "0.2.34"
+__version__ = "0.2.35"
diff --git a/notebook/agentchat_RetrieveChat.ipynb b/notebook/agentchat_RetrieveChat.ipynb
index 6fefcd3ba44c..eee192c4f826 100644
--- a/notebook/agentchat_RetrieveChat.ipynb
+++ b/notebook/agentchat_RetrieveChat.ipynb
@@ -10,7 +10,7 @@
     "AutoGen offers conversable agents powered by LLM, tool or human, which can be used to perform tasks collectively via automated chat. This framework allows tool use and human participation through multi-agent conversation.\n",
     "Please find documentation about this feature [here](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat).\n",
     "\n",
-    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveAssistantAgent` and  `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
+    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `AssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
     "\n",
     "## Table of Contents\n",
     "We'll demonstrate six examples of using RetrieveChat for code generation and question answering:\n",
@@ -66,7 +66,7 @@
     "import chromadb\n",
     "\n",
     "import autogen\n",
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
+    "from autogen import AssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "\n",
     "# Accepted file formats for that can be stored in\n",
@@ -92,7 +92,7 @@
     "\n",
     "## Construct agents for RetrieveChat\n",
     "\n",
-    "We start by initializing the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for RetrieveAssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
+    "We start by initializing the `AssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for AssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
    ]
   },
   {
@@ -129,8 +129,8 @@
     }
    ],
    "source": [
-    "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
-    "assistant = RetrieveAssistantAgent(\n",
+    "# 1. create an AssistantAgent instance named \"assistant\"\n",
+    "assistant = AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config={\n",
@@ -141,15 +141,9 @@
     ")\n",
     "\n",
     "# 2. create the RetrieveUserProxyAgent instance named \"ragproxyagent\"\n",
-    "# By default, the human_input_mode is \"ALWAYS\", which means the agent will ask for human input at every step. We set it to \"NEVER\" here.\n",
-    "# `docs_path` is the path to the docs directory. It can also be the path to a single file, or the url to a single file. By default,\n",
-    "# it is set to None, which works only if the collection is already created.\n",
-    "# `task` indicates the kind of task we're working on. In this example, it's a `code` task.\n",
-    "# `chunk_token_size` is the chunk token size for the retrieve chat. By default, it is set to `max_tokens * 0.6`, here we set it to 2000.\n",
-    "# `custom_text_types` is a list of file types to be processed. Default is `autogen.retrieve_utils.TEXT_FORMATS`.\n",
-    "# This only applies to files under the directories in `docs_path`. Explicitly included files and urls will be chunked regardless of their types.\n",
-    "# In this example, we set it to [\"non-existent-type\"] to only process markdown files. Since no \"non-existent-type\" files are included in the `websit/docs`,\n",
-    "# no files there will be processed. However, the explicitly included urls will still be processed.\n",
+    "# Refer to https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/retrieve_user_proxy_agent\n",
+    "# and https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/vectordb/chromadb\n",
+    "# for more information on the RetrieveUserProxyAgent and ChromaVectorDB\n",
     "ragproxyagent = RetrieveUserProxyAgent(\n",
     "    name=\"ragproxyagent\",\n",
     "    human_input_mode=\"NEVER\",\n",
@@ -159,13 +153,10 @@
     "        \"docs_path\": [\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Examples/Integrate%20-%20Spark.md\",\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Research.md\",\n",
-    "            os.path.join(os.path.abspath(\"\"), \"..\", \"website\", \"docs\"),\n",
     "        ],\n",
-    "        \"custom_text_types\": [\"non-existent-type\"],\n",
     "        \"chunk_token_size\": 2000,\n",
     "        \"model\": config_list[0][\"model\"],\n",
-    "        # \"client\": chromadb.PersistentClient(path=\"/tmp/chromadb\"),  # deprecated, use \"vector_db\" instead\n",
-    "        \"vector_db\": \"chroma\",  # to use the deprecated `client` parameter, set to None and uncomment the line above\n",
+    "        \"vector_db\": \"chroma\",\n",
     "        \"overwrite\": False,  # set to True if you want to overwrite an existing collection\n",
     "        \"get_or_create\": True,  # set to False if don't want to reuse an existing collection\n",
     "    },\n",
@@ -196,8 +187,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2024-08-02 06:30:11,303 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - \u001b[32mUse the existing collection `autogen-docs`.\u001b[0m\n",
-      "2024-08-02 06:30:11,485 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 2 chunks.\u001b[0m\n"
+      "2024-08-14 06:22:06,884 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - \u001b[32mUse the existing collection `autogen-docs`.\u001b[0m\n"
      ]
     },
     {
@@ -211,6 +201,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "2024-08-14 06:22:07,353 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 2 chunks.\u001b[0m\n",
       "Number of requested results 20 is greater than number of elements in index 2, updating n_results = 2\n"
      ]
     },
@@ -764,23 +755,22 @@
       "\n",
       "\n",
       "\n",
-      "--------------------------------------------------------------------------------\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
       "\u001b[33massistant\u001b[0m (to ragproxyagent):\n",
       "\n",
-      "The author of FLAML is Chi Wang, along with several co-authors for various publications related to FLAML.\n",
+      "The authors of FLAML (Fast and Lightweight AutoML) as mentioned in the provided context are Chi Wang, Qingyun Wu, Markus Weimer, and Erkang Zhu. They are listed as the authors of the publication titled \"FLAML: A Fast and Lightweight AutoML Library\" which appeared in MLSys 2021.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "The authors of FLAML (Fast and Lightweight AutoML) as mentioned in the provided context are Chi Wang, Qingyun Wu, Markus Weimer, and Erkang Zhu. They are listed as the authors of the publication titled \"FLAML: A Fast and Lightweight AutoML Library\" which appeared in MLSys 2021.\n",
       "\n",
       "--------------------------------------------------------------------------------\n"
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "ChatResult(chat_id=None, chat_history=[{'content': 'You\\'re a retrieve augmented coding assistant. You answer user\\'s questions based on your own knowledge and the\\ncontext provided by the user.\\nIf you can\\'t answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.\\nFor code generation, you must obey the following rules:\\nRule 1. You MUST NOT install any packages because all the packages needed are already installed.\\nRule 2. You must follow the formats below to write your code:\\n```language\\n# your code\\n```\\n\\nUser\\'s question is: Who is the author of FLAML?\\n\\nContext is: # Research\\n\\nFor technical details, please check our research publications.\\n\\n- [FLAML: A Fast and Lightweight AutoML Library](https://www.microsoft.com/en-us/research/publication/flaml-a-fast-and-lightweight-automl-library/). Chi Wang, Qingyun Wu, Markus Weimer, Erkang Zhu. MLSys 2021.\\n\\n```bibtex\\n@inproceedings{wang2021flaml,\\n    title={FLAML: A Fast and Lightweight AutoML Library},\\n    author={Chi Wang and Qingyun Wu and Markus Weimer and Erkang Zhu},\\n    year={2021},\\n    booktitle={MLSys},\\n}\\n```\\n\\n- [Frugal Optimization for Cost-related Hyperparameters](https://arxiv.org/abs/2005.01571). Qingyun Wu, Chi Wang, Silu Huang. AAAI 2021.\\n\\n```bibtex\\n@inproceedings{wu2021cfo,\\n    title={Frugal Optimization for Cost-related Hyperparameters},\\n    author={Qingyun Wu and Chi Wang and Silu Huang},\\n    year={2021},\\n    booktitle={AAAI},\\n}\\n```\\n\\n- [Economical Hyperparameter Optimization With Blended Search Strategy](https://www.microsoft.com/en-us/research/publication/economical-hyperparameter-optimization-with-blended-search-strategy/). Chi Wang, Qingyun Wu, Silu Huang, Amin Saied. ICLR 2021.\\n\\n```bibtex\\n@inproceedings{wang2021blendsearch,\\n    title={Economical Hyperparameter Optimization With Blended Search Strategy},\\n    author={Chi Wang and Qingyun Wu and Silu Huang and Amin Saied},\\n    year={2021},\\n    booktitle={ICLR},\\n}\\n```\\n\\n- [An Empirical Study on Hyperparameter Optimization for Fine-Tuning Pre-trained Language Models](https://aclanthology.org/2021.acl-long.178.pdf). Susan Xueqing Liu, Chi Wang. ACL 2021.\\n\\n```bibtex\\n@inproceedings{liuwang2021hpolm,\\n    title={An Empirical Study on Hyperparameter Optimization for Fine-Tuning Pre-trained Language Models},\\n    author={Susan Xueqing Liu and Chi Wang},\\n    year={2021},\\n    booktitle={ACL},\\n}\\n```\\n\\n- [ChaCha for Online AutoML](https://www.microsoft.com/en-us/research/publication/chacha-for-online-automl/). Qingyun Wu, Chi Wang, John Langford, Paul Mineiro and Marco Rossi. ICML 2021.\\n\\n```bibtex\\n@inproceedings{wu2021chacha,\\n    title={ChaCha for Online AutoML},\\n    author={Qingyun Wu and Chi Wang and John Langford and Paul Mineiro and Marco Rossi},\\n    year={2021},\\n    booktitle={ICML},\\n}\\n```\\n\\n- [Fair AutoML](https://arxiv.org/abs/2111.06495). Qingyun Wu, Chi Wang. ArXiv preprint arXiv:2111.06495 (2021).\\n\\n```bibtex\\n@inproceedings{wuwang2021fairautoml,\\n    title={Fair AutoML},\\n    author={Qingyun Wu and Chi Wang},\\n    year={2021},\\n    booktitle={ArXiv preprint arXiv:2111.06495},\\n}\\n```\\n\\n- [Mining Robust Default Configurations for Resource-constrained AutoML](https://arxiv.org/abs/2202.09927). Moe Kayali, Chi Wang. ArXiv preprint arXiv:2202.09927 (2022).\\n\\n```bibtex\\n@inproceedings{kayaliwang2022default,\\n    title={Mining Robust Default Configurations for Resource-constrained AutoML},\\n    author={Moe Kayali and Chi Wang},\\n    year={2022},\\n    booktitle={ArXiv preprint arXiv:2202.09927},\\n}\\n```\\n\\n- [Targeted Hyperparameter Optimization with Lexicographic Preferences Over Multiple Objectives](https://openreview.net/forum?id=0Ij9_q567Ma). Shaokun Zhang, Feiran Jia, Chi Wang, Qingyun Wu. ICLR 2023 (notable-top-5%).\\n\\n```bibtex\\n@inproceedings{zhang2023targeted,\\n    title={Targeted Hyperparameter Optimization with Lexicographic Preferences Over Multiple Objectives},\\n    author={Shaokun Zhang and Feiran Jia and Chi Wang and Qingyun Wu},\\n    booktitle={International Conference on Learning Representations},\\n    year={2023},\\n    url={https://openreview.net/forum?id=0Ij9_q567Ma},\\n}\\n```\\n\\n- [Cost-Effective Hyperparameter Optimization for Large Language Model Generation Inference](https://arxiv.org/abs/2303.04673). Chi Wang, Susan Xueqing Liu, Ahmed H. Awadallah. ArXiv preprint arXiv:2303.04673 (2023).\\n\\n```bibtex\\n@inproceedings{wang2023EcoOptiGen,\\n    title={Cost-Effective Hyperparameter Optimization for Large Language Model Generation Inference},\\n    author={Chi Wang and Susan Xueqing Liu and Ahmed H. Awadallah},\\n    year={2023},\\n    booktitle={ArXiv preprint arXiv:2303.04673},\\n}\\n```\\n\\n- [An Empirical Study on Challenging Math Problem Solving with GPT-4](https://arxiv.org/abs/2306.01337). Yiran Wu, Feiran Jia, Shaokun Zhang, Hangyu Li, Erkang Zhu, Yue Wang, Yin Tat Lee, Richard Peng, Qingyun Wu, Chi Wang. ArXiv preprint arXiv:2306.01337 (2023).\\n\\n```bibtex\\n@inproceedings{wu2023empirical,\\n    title={An Empirical Study on Challenging Math Problem Solving with GPT-4},\\n    author={Yiran Wu and Feiran Jia and Shaokun Zhang and Hangyu Li and Erkang Zhu and Yue Wang and Yin Tat Lee and Richard Peng and Qingyun Wu and Chi Wang},\\n    year={2023},\\n    booktitle={ArXiv preprint arXiv:2306.01337},\\n}\\n```\\n# Integrate - Spark\\n\\nFLAML has integrated Spark for distributed training. There are two main aspects of integration with Spark:\\n\\n- Use Spark ML estimators for AutoML.\\n- Use Spark to run training in parallel spark jobs.\\n\\n## Spark ML Estimators\\n\\nFLAML integrates estimators based on Spark ML models. These models are trained in parallel using Spark, so we called them Spark estimators. To use these models, you first need to organize your data in the required format.\\n\\n### Data\\n\\nFor Spark estimators, AutoML only consumes Spark data. FLAML provides a convenient function `to_pandas_on_spark` in the `flaml.automl.spark.utils` module to convert your data into a pandas-on-spark (`pyspark.pandas`) dataframe/series, which Spark estimators require.\\n\\nThis utility function takes data in the form of a `pandas.Dataframe` or `pyspark.sql.Dataframe` and converts it into a pandas-on-spark dataframe. It also takes `pandas.Series` or `pyspark.sql.Dataframe` and converts it into a [pandas-on-spark](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/index.html) series. If you pass in a `pyspark.pandas.Dataframe`, it will not make any changes.\\n\\nThis function also accepts optional arguments `index_col` and `default_index_type`.\\n\\n- `index_col` is the column name to use as the index, default is None.\\n- `default_index_type` is the default index type, default is \"distributed-sequence\". More info about default index type could be found on Spark official [documentation](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/options.html#default-index-type)\\n\\nHere is an example code snippet for Spark Data:\\n\\n```python\\nimport pandas as pd\\nfrom flaml.automl.spark.utils import to_pandas_on_spark\\n\\n# Creating a dictionary\\ndata = {\\n    \"Square_Feet\": [800, 1200, 1800, 1500, 850],\\n    \"Age_Years\": [20, 15, 10, 7, 25],\\n    \"Price\": [100000, 200000, 300000, 240000, 120000],\\n}\\n\\n# Creating a pandas DataFrame\\ndataframe = pd.DataFrame(data)\\nlabel = \"Price\"\\n\\n# Convert to pandas-on-spark dataframe\\npsdf = to_pandas_on_spark(dataframe)\\n```\\n\\nTo use Spark ML models you need to format your data appropriately. Specifically, use [`VectorAssembler`](https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.feature.VectorAssembler.html) to merge all feature columns into a single vector column.\\n\\nHere is an example of how to use it:\\n\\n```python\\nfrom pyspark.ml.feature import VectorAssembler\\n\\ncolumns = psdf.columns\\nfeature_cols = [col for col in columns if col != label]\\nfeaturizer = VectorAssembler(inputCols=feature_cols, outputCol=\"features\")\\npsdf = featurizer.transform(psdf.to_spark(index_col=\"index\"))[\"index\", \"features\"]\\n```\\n\\nLater in conducting the experiment, use your pandas-on-spark data like non-spark data and pass them using `X_train, y_train` or `dataframe, label`.\\n\\n### Estimators\\n\\n#### Model List\\n\\n- `lgbm_spark`: The class for fine-tuning Spark version LightGBM models, using [SynapseML](https://microsoft.github.io/SynapseML/docs/features/lightgbm/about/) API.\\n\\n#### Usage\\n\\nFirst, prepare your data in the required format as described in the previous section.\\n\\nBy including the models you intend to try in the `estimators_list` argument to `flaml.automl`, FLAML will start trying configurations for these models. If your input is Spark data, FLAML will also use estimators with the `_spark` postfix by default, even if you haven\\'t specified them.\\n\\nHere is an example code snippet using SparkML models in AutoML:\\n\\n```python\\nimport flaml\\n\\n# prepare your data in pandas-on-spark format as we previously mentioned\\n\\nautoml = flaml.AutoML()\\nsettings = {\\n    \"time_budget\": 30,\\n    \"metric\": \"r2\",\\n    \"estimator_list\": [\"lgbm_spark\"],  # this setting is optional\\n    \"task\": \"regression\",\\n}\\n\\nautoml.fit(\\n    dataframe=psdf,\\n    label=label,\\n    **settings,\\n)\\n```\\n\\n[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/automl_bankrupt_synapseml.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/automl_bankrupt_synapseml.ipynb)\\n\\n## Parallel Spark Jobs\\n\\nYou can activate Spark as the parallel backend during parallel tuning in both [AutoML](/docs/Use-Cases/Task-Oriented-AutoML#parallel-tuning) and [Hyperparameter Tuning](/docs/Use-Cases/Tune-User-Defined-Function#parallel-tuning), by setting the `use_spark` to `true`. FLAML will dispatch your job to the distributed Spark backend using [`joblib-spark`](https://github.com/joblib/joblib-spark).\\n\\nPlease note that you should not set `use_spark` to `true` when applying AutoML and Tuning for Spark Data. This is because only SparkML models will be used for Spark Data in AutoML and Tuning. As SparkML models run in parallel, there is no need to distribute them with `use_spark` again.\\n\\nAll the Spark-related arguments are stated below. These arguments are available in both Hyperparameter Tuning and AutoML:\\n\\n- `use_spark`: boolean, default=False | Whether to use spark to run the training in parallel spark jobs. This can be used to accelerate training on large models and large datasets, but will incur more overhead in time and thus slow down training in some cases. GPU training is not supported yet when use_spark is True. For Spark clusters, by default, we will launch one trial per executor. However, sometimes we want to launch more trials than the number of executors (e.g., local mode). In this case, we can set the environment variable `FLAML_MAX_CONCURRENT` to override the detected `num_executors`. The final number of concurrent trials will be the minimum of `n_concurrent_trials` and `num_executors`.\\n- `n_concurrent_trials`: int, default=1 | The number of concurrent trials. When n_concurrent_trials > 1, FLAML performes parallel tuning.\\n- `force_cancel`: boolean, default=False | Whether to forcely cancel Spark jobs if the search time exceeded the time budget. Spark jobs include parallel tuning jobs and Spark-based model training jobs.\\n\\nAn example code snippet for using parallel Spark jobs:\\n\\n```python\\nimport flaml\\n\\nautoml_experiment = flaml.AutoML()\\nautoml_settings = {\\n    \"time_budget\": 30,\\n    \"metric\": \"r2\",\\n    \"task\": \"regression\",\\n    \"n_concurrent_trials\": 2,\\n    \"use_spark\": True,\\n    \"force_cancel\": True,  # Activating the force_cancel option can immediately halt Spark jobs once they exceed the allocated time_budget.\\n}\\n\\nautoml.fit(\\n    dataframe=dataframe,\\n    label=label,\\n    **automl_settings,\\n)\\n```\\n\\n[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb)\\n\\n', 'role': 'assistant'}, {'content': 'The author of FLAML is Chi Wang, along with several co-authors for various publications related to FLAML.', 'role': 'user'}], summary='The author of FLAML is Chi Wang, along with several co-authors for various publications related to FLAML.', cost=({'total_cost': 0.004711, 'gpt-35-turbo': {'cost': 0.004711, 'prompt_tokens': 3110, 'completion_tokens': 23, 'total_tokens': 3133}}, {'total_cost': 0}), human_input=[])"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
diff --git a/notebook/agentchat_RetrieveChat_mongodb.ipynb b/notebook/agentchat_RetrieveChat_mongodb.ipynb
index 0f24cf16579f..09c3c44bef27 100644
--- a/notebook/agentchat_RetrieveChat_mongodb.ipynb
+++ b/notebook/agentchat_RetrieveChat_mongodb.ipynb
@@ -10,7 +10,7 @@
     "AutoGen offers conversable agents powered by LLM, tool or human, which can be used to perform tasks collectively via automated chat. This framework allows tool use and human participation through multi-agent conversation.\n",
     "Please find documentation about this feature [here](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat).\n",
     "\n",
-    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveAssistantAgent` and  `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
+    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `AssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
     "\n",
     "## Table of Contents\n",
     "We'll demonstrate six examples of using RetrieveChat for code generation and question answering:\n",
@@ -58,7 +58,7 @@
     "import os\n",
     "\n",
     "import autogen\n",
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
+    "from autogen import AssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "\n",
     "# Accepted file formats for that can be stored in\n",
@@ -83,7 +83,7 @@
     "\n",
     "## Construct agents for RetrieveChat\n",
     "\n",
-    "We start by initializing the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for RetrieveAssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
+    "We start by initializing the `AssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for AssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
    ]
   },
   {
@@ -111,8 +111,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
-    "assistant = RetrieveAssistantAgent(\n",
+    "# 1. create an AssistantAgent instance named \"assistant\"\n",
+    "assistant = AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config={\n",
@@ -123,16 +123,9 @@
     ")\n",
     "\n",
     "# 2. create the RetrieveUserProxyAgent instance named \"ragproxyagent\"\n",
-    "# By default, the human_input_mode is \"ALWAYS\", which means the agent will ask for human input at every step. We set it to \"NEVER\" here.\n",
-    "# `docs_path` is the path to the docs directory. It can also be the path to a single file, or the url to a single file. By default,\n",
-    "# it is set to None, which works only if the collection is already created.\n",
-    "# `task` indicates the kind of task we're working on. In this example, it's a `code` task.\n",
-    "# `chunk_token_size` is the chunk token size for the retrieve chat. By default, it is set to `max_tokens * 0.6`, here we set it to 2000.\n",
-    "# `custom_text_types` is a list of file types to be processed. Default is `autogen.retrieve_utils.TEXT_FORMATS`.\n",
-    "# This only applies to files under the directories in `docs_path`. Explicitly included files and urls will be chunked regardless of their types.\n",
-    "# In this example, we set it to [\"non-existent-type\"] to only process markdown files. Since no \"non-existent-type\" files are included in the `websit/docs`,\n",
-    "# no files there will be processed. However, the explicitly included urls will still be processed.\n",
-    "# **NOTE** Upon the first time adding in the documents, initial query may be slower due to index creation and document indexing time\n",
+    "# Refer to https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/retrieve_user_proxy_agent\n",
+    "# and https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/vectordb/mongodb\n",
+    "# for more information on the RetrieveUserProxyAgent and MongoDBAtlasVectorDB\n",
     "ragproxyagent = RetrieveUserProxyAgent(\n",
     "    name=\"ragproxyagent\",\n",
     "    human_input_mode=\"NEVER\",\n",
@@ -142,9 +135,7 @@
     "        \"docs_path\": [\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Examples/Integrate%20-%20Spark.md\",\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Research.md\",\n",
-    "            os.path.join(os.path.abspath(\"\"), \"..\", \"website\", \"docs\"),\n",
     "        ],\n",
-    "        \"custom_text_types\": [\"non-existent-type\"],\n",
     "        \"chunk_token_size\": 2000,\n",
     "        \"model\": config_list[0][\"model\"],\n",
     "        \"vector_db\": \"mongodb\",  # MongoDB Atlas database\n",
diff --git a/notebook/agentchat_RetrieveChat_pgvector.ipynb b/notebook/agentchat_RetrieveChat_pgvector.ipynb
index 1a8d70e29654..4d9dd44c33d8 100644
--- a/notebook/agentchat_RetrieveChat_pgvector.ipynb
+++ b/notebook/agentchat_RetrieveChat_pgvector.ipynb
@@ -10,7 +10,7 @@
     "AutoGen offers conversable agents powered by LLM, tool or human, which can be used to perform tasks collectively via automated chat. This framework allows tool use and human participation through multi-agent conversation.\n",
     "Please find documentation about this feature [here](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat).\n",
     "\n",
-    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveAssistantAgent` and  `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
+    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `AssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
     "\n",
     "## Table of Contents\n",
     "We'll demonstrate six examples of using RetrieveChat for code generation and question answering:\n",
@@ -92,29 +92,13 @@
     "from sentence_transformers import SentenceTransformer\n",
     "\n",
     "import autogen\n",
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
+    "from autogen import AssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "\n",
     "# Accepted file formats for that can be stored in\n",
     "# a vector database instance\n",
     "from autogen.retrieve_utils import TEXT_FORMATS\n",
     "\n",
-    "config_list = [\n",
-    "    {\n",
-    "        \"model\": \"Meta-Llama-3-8B-Instruct-imatrix\",\n",
-    "        \"api_key\": \"YOUR_API_KEY\",\n",
-    "        \"base_url\": \"http://localhost:8080/v1\",\n",
-    "        \"api_type\": \"openai\",\n",
-    "    },\n",
-    "    {\"model\": \"gpt-3.5-turbo-0125\", \"api_key\": \"YOUR_API_KEY\", \"api_type\": \"openai\"},\n",
-    "    {\n",
-    "        \"model\": \"gpt-35-turbo\",\n",
-    "        \"base_url\": \"...\",\n",
-    "        \"api_type\": \"azure\",\n",
-    "        \"api_version\": \"2023-07-01-preview\",\n",
-    "        \"api_key\": \"...\",\n",
-    "    },\n",
-    "]\n",
     "config_list = autogen.config_list_from_json(\n",
     "    \"OAI_CONFIG_LIST\",\n",
     "    file_location=\".\",\n",
@@ -136,7 +120,7 @@
     "\n",
     "## Construct agents for RetrieveChat\n",
     "\n",
-    "We start by initializing the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for RetrieveAssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
+    "We start by initializing the `AssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for AssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
    ]
   },
   {
@@ -173,8 +157,8 @@
     }
    ],
    "source": [
-    "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
-    "assistant = RetrieveAssistantAgent(\n",
+    "# 1. create an AssistantAgent instance named \"assistant\"\n",
+    "assistant = AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    system_message=\"You are a helpful assistant. You must always reply with some form of text.\",\n",
     "    llm_config={\n",
@@ -191,15 +175,9 @@
     "sentence_transformer_ef = SentenceTransformer(\"all-distilroberta-v1\").encode\n",
     "\n",
     "# 2. create the RetrieveUserProxyAgent instance named \"ragproxyagent\"\n",
-    "# By default, the human_input_mode is \"ALWAYS\", which means the agent will ask for human input at every step. We set it to \"NEVER\" here.\n",
-    "# `docs_path` is the path to the docs directory. It can also be the path to a single file, or the url to a single file. By default,\n",
-    "# it is set to None, which works only if the collection is already created.\n",
-    "# `task` indicates the kind of task we're working on. In this example, it's a `code` task.\n",
-    "# `chunk_token_size` is the chunk token size for the retrieve chat. By default, it is set to `max_tokens * 0.6`, here we set it to 2000.\n",
-    "# `custom_text_types` is a list of file types to be processed. Default is `autogen.retrieve_utils.TEXT_FORMATS`.\n",
-    "# This only applies to files under the directories in `docs_path`. Explicitly included files and urls will be chunked regardless of their types.\n",
-    "# In this example, we set it to [\"non-existent-type\"] to only process markdown files. Since no \"non-existent-type\" files are included in the `websit/docs`,\n",
-    "# no files there will be processed. However, the explicitly included urls will still be processed.\n",
+    "# Refer to https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/retrieve_user_proxy_agent\n",
+    "# and https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/vectordb/pgvectordb\n",
+    "# for more information on the RetrieveUserProxyAgent and PGVectorDB\n",
     "ragproxyagent = RetrieveUserProxyAgent(\n",
     "    name=\"ragproxyagent\",\n",
     "    human_input_mode=\"NEVER\",\n",
@@ -209,9 +187,7 @@
     "        \"docs_path\": [\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Examples/Integrate%20-%20Spark.md\",\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Research.md\",\n",
-    "            os.path.join(os.path.abspath(\"\"), \"..\", \"website\", \"docs\"),\n",
     "        ],\n",
-    "        \"custom_text_types\": [\"non-existent-type\"],\n",
     "        \"chunk_token_size\": 2000,\n",
     "        \"model\": config_list[0][\"model\"],\n",
     "        \"vector_db\": \"pgvector\",  # PGVector database\n",
diff --git a/notebook/agentchat_RetrieveChat_qdrant.ipynb b/notebook/agentchat_RetrieveChat_qdrant.ipynb
index b5bc2f681d22..0035a8e30817 100644
--- a/notebook/agentchat_RetrieveChat_qdrant.ipynb
+++ b/notebook/agentchat_RetrieveChat_qdrant.ipynb
@@ -9,10 +9,10 @@
     "\n",
     "[Qdrant](https://qdrant.tech/) is a high-performance vector search engine/database.\n",
     "\n",
-    "This notebook demonstrates the usage of `QdrantRetrieveUserProxyAgent` for RAG, based on [agentchat_RetrieveChat.ipynb](https://colab.research.google.com/github/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat.ipynb).\n",
+    "This notebook demonstrates the usage of Qdrant for RAG, based on [agentchat_RetrieveChat.ipynb](https://colab.research.google.com/github/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat.ipynb).\n",
     "\n",
     "\n",
-    "RetrieveChat is a conversational system for retrieve augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `RetrieveAssistantAgent` and `QdrantRetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)).\n",
+    "RetrieveChat is a conversational system for retrieve augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `AssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)).\n",
     "\n",
     "We'll demonstrate usage of RetrieveChat with Qdrant for code generation and question answering w/ human feedback.\n",
     "\n",
@@ -74,7 +74,7 @@
     "from sentence_transformers import SentenceTransformer\n",
     "\n",
     "import autogen\n",
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
+    "from autogen import AssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "\n",
     "# Accepted file formats for that can be stored in\n",
@@ -125,7 +125,7 @@
    "source": [
     "## Construct agents for RetrieveChat\n",
     "\n",
-    "We start by initializing the `RetrieveAssistantAgent` and `QdrantRetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for RetrieveAssistantAgent. The detailed instructions are given in the user message. Later we will use the `QdrantRetrieveUserProxyAgent.generate_init_prompt` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant.\n",
+    "We start by initializing the `AssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for AssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.generate_init_prompt` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant.\n",
     "\n",
     "### You can find the list of all the embedding models supported by Qdrant [here](https://qdrant.github.io/fastembed/examples/Supported_Models/)."
    ]
@@ -151,8 +151,8 @@
     }
    ],
    "source": [
-    "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
-    "assistant = RetrieveAssistantAgent(\n",
+    "# 1. create an AssistantAgent instance named \"assistant\"\n",
+    "assistant = AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config={\n",
@@ -167,18 +167,9 @@
     "client = QdrantClient(\":memory:\")\n",
     "\n",
     "# 2. create the RetrieveUserProxyAgent instance named \"ragproxyagent\"\n",
-    "# By default, the human_input_mode is \"ALWAYS\", which means the agent will ask for human input at every step. We set it to \"NEVER\" here.\n",
-    "# `docs_path` is the path to the docs directory. It can also be the path to a single file, or the url to a single file. By default,\n",
-    "# it is set to None, which works only if the collection is already created.\n",
-    "#\n",
-    "# Here we generated the documentations from FLAML's docstrings. Not needed if you just want to try this notebook but not to reproduce the\n",
-    "# outputs. Clone the FLAML (https://github.com/microsoft/FLAML) repo and navigate to its website folder. Pip install and run `pydoc-markdown`\n",
-    "# and it will generate folder `reference` under `website/docs`.\n",
-    "#\n",
-    "# `task` indicates the kind of task we're working on. In this example, it's a `code` task.\n",
-    "# `chunk_token_size` is the chunk token size for the retrieve chat. By default, it is set to `max_tokens * 0.6`, here we set it to 2000.\n",
-    "# We use an in-memory QdrantClient instance here. Not recommended for production.\n",
-    "# Get the installation instructions here: https://qdrant.tech/documentation/guides/installation/\n",
+    "# Refer to https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/retrieve_user_proxy_agent\n",
+    "# and https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/vectordb/qdrant\n",
+    "# for more information on the RetrieveUserProxyAgent and QdrantVectorDB\n",
     "ragproxyagent = RetrieveUserProxyAgent(\n",
     "    name=\"ragproxyagent\",\n",
     "    human_input_mode=\"NEVER\",\n",
diff --git a/notebook/agentchat_groupchat_RAG.ipynb b/notebook/agentchat_groupchat_RAG.ipynb
index 1057deabf924..e18bd99c1511 100644
--- a/notebook/agentchat_groupchat_RAG.ipynb
+++ b/notebook/agentchat_groupchat_RAG.ipynb
@@ -35,14 +35,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "LLM models:  ['gpt4-1106-preview', 'gpt-35-turbo', 'gpt-35-turbo-0613']\n"
+      "LLM models:  ['gpt-35-turbo', 'gpt4-1106-preview', 'gpt-4o']\n"
      ]
     }
    ],
@@ -75,18 +75,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 16,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/lijiang1/anaconda3/envs/autogen/lib/python3.10/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
-      "  torch.utils._pytree._register_pytree_node(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "def termination_msg(x):\n",
     "    return isinstance(x, dict) and \"TERMINATE\" == str(x.get(\"content\", \"\"))[-9:].upper()\n",
@@ -205,15 +196,9 @@
     "        n_results: Annotated[int, \"number of results\"] = 3,\n",
     "    ) -> str:\n",
     "        boss_aid.n_results = n_results  # Set the number of results to be retrieved.\n",
-    "        # Check if we need to update the context.\n",
-    "        update_context_case1, update_context_case2 = boss_aid._check_update_context(message)\n",
-    "        if (update_context_case1 or update_context_case2) and boss_aid.update_context:\n",
-    "            boss_aid.problem = message if not hasattr(boss_aid, \"problem\") else boss_aid.problem\n",
-    "            _, ret_msg = boss_aid._generate_retrieve_user_reply(message)\n",
-    "        else:\n",
-    "            _context = {\"problem\": message, \"n_results\": n_results}\n",
-    "            ret_msg = boss_aid.message_generator(boss_aid, None, _context)\n",
-    "        return ret_msg if ret_msg else message\n",
+    "        _context = {\"problem\": message, \"n_results\": n_results}\n",
+    "        ret_msg = boss_aid.message_generator(boss_aid, None, _context)\n",
+    "        return ret_msg or message\n",
     "\n",
     "    boss_aid.human_input_mode = \"NEVER\"  # Disable human input for boss_aid since it only retrieves content.\n",
     "\n",
@@ -255,7 +240,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -266,137 +251,130 @@
       "\n",
       "How to use spark for parallel training in FLAML? Give me sample code.\n",
       "\n",
-      "--------------------------------------------------------------------------------\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33mSenior_Python_Engineer\u001b[0m (to chat_manager):\n",
-      "\n",
-      "To use Spark for parallel training in FLAML (Fast and Lightweight AutoML), you would need to set up a Spark cluster and utilize the `spark` backend for joblib, which FLAML uses internally for parallel training. Here’s an example of how you might set up and use Spark with FLAML for AutoML tasks:\n",
-      "\n",
-      "Firstly, ensure that you have the Spark cluster set up and the `pyspark` and `joblib-spark` packages installed in your environment. You can install the required packages using pip if they are not already installed:\n",
+      "--------------------------------------------------------------------------------\n",
+      "How to use spark for parallel training in FLAML? Give me sample code.\n",
       "\n",
-      "```python\n",
-      "!pip install flaml pyspark joblib-spark\n",
-      "```\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Senior_Python_Engineer\n",
+      "\u001b[0m\n",
+      "\u001b[33mSenior_Python_Engineer\u001b[0m (to chat_manager):\n",
       "\n",
-      "Here's a sample code snippet that demonstrates how to use FLAML with Spark for parallel training:\n",
+      "To use Spark for parallel training in FLAML, you need to install `pyspark` package and set up a Spark cluster. Here's some sample code for using Spark in FLAML:\n",
       "\n",
       "```python\n",
       "from flaml import AutoML\n",
       "from pyspark.sql import SparkSession\n",
-      "from sklearn.datasets import load_digits\n",
-      "from joblibspark import register_spark\n",
       "\n",
-      "# Initialize a Spark session\n",
-      "spark = SparkSession.builder \\\n",
-      "    .master(\"local[*]\") \\\n",
-      "    .appName(\"FLAML_Spark_Example\") \\\n",
-      "    .getOrCreate()\n",
+      "# create a SparkSession\n",
+      "spark = SparkSession.builder.appName(\"FLAML-Spark\").getOrCreate()\n",
       "\n",
-      "# Register the joblib spark backend\n",
-      "register_spark()  # This registers the backend for parallel processing\n",
-      "\n",
-      "# Load sample data\n",
-      "X, y = load_digits(return_X_y=True)\n",
-      "\n",
-      "# Initialize an AutoML instance\n",
+      "# create a FLAML AutoML object with Spark backend\n",
       "automl = AutoML()\n",
       "\n",
-      "# Define the settings for the AutoML run\n",
+      "# load data from Spark DataFrame\n",
+      "data = spark.read.format(\"csv\").option(\"header\", \"true\").load(\"data.csv\")\n",
+      "\n",
+      "# specify the target column and task type\n",
       "settings = {\n",
-      "    \"time_budget\": 60,  # Total running time in seconds\n",
-      "    \"metric\": 'accuracy',  # Primary metric for evaluation\n",
-      "    \"task\": 'classification',  # Task type\n",
-      "    \"n_jobs\": -1,  # Number of jobs to run in parallel (use -1 for all)\n",
-      "    \"estimator_list\": ['lgbm', 'rf', 'xgboost'],  # List of estimators to consider\n",
-      "    \"log_file_name\": \"flaml_log.txt\",  # Log file name\n",
+      "    \"time_budget\": 60, # time budget in seconds\n",
+      "    \"metric\": 'accuracy',\n",
+      "    \"task\": 'classification',\n",
       "}\n",
       "\n",
-      "# Run the AutoML search with Spark backend\n",
-      "automl.fit(X_train=X, y_train=y, **settings)\n",
+      "# train and validate models in parallel using Spark\n",
+      "best_model = automl.fit(data, **settings)\n",
       "\n",
-      "# Output the best model and its performance\n",
-      "print(f\"Best ML model: {automl.model}\")\n",
-      "print(f\"Best ML model's accuracy: {automl.best_loss}\")\n",
+      "# print the best model and its metadata\n",
+      "print(automl.model_name)\n",
+      "print(automl.best_model)\n",
+      "print(automl.best_config)\n",
       "\n",
-      "# Stop the Spark session\n",
+      "# stop the SparkSession\n",
       "spark.stop()\n",
+      "\n",
+      "# terminate the code execution\n",
+      "TERMINATE\n",
       "```\n",
       "\n",
-      "The `register_spark()` function from `joblib-spark` is used to register the Spark backend with joblib, which is utilized for parallel training within FLAML. The `n_jobs=-1` parameter tells FLAML to use all available Spark executors for parallel training.\n",
+      "Note that this is just a sample code, you may need to modify it to fit your specific use case.\n",
       "\n",
-      "Please note that the actual process of setting up a Spark cluster can be complex and might involve additional steps such as configuring Spark workers, allocating resources, and more, which are beyond the scope of this code snippet.\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Code_Reviewer\n",
+      "\u001b[0m\n",
+      "\u001b[33mCode_Reviewer\u001b[0m (to chat_manager):\n",
       "\n",
-      "If you encounter any issues or need to adjust configurations for your specific Spark setup, please refer to the Spark and FLAML documentation for more details.\n",
       "\n",
-      "When you run the code, ensure that your Spark cluster is properly configured and accessible from your Python environment. Adjust the `.master(\"local[*]\")` to point to your Spark master's URL if you are running a cluster that is not local.\n",
       "\n",
       "--------------------------------------------------------------------------------\n",
-      "To use Spark for parallel training in FLAML (Fast and Lightweight AutoML), you would need to set up a Spark cluster and utilize the `spark` backend for joblib, which FLAML uses internally for parallel training. Here’s an example of how you might set up and use Spark with FLAML for AutoML tasks:\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
+      "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
       "\n",
-      "Firstly, ensure that you have the Spark cluster set up and the `pyspark` and `joblib-spark` packages installed in your environment. You can install the required packages using pip if they are not already installed:\n",
+      "Do you have any questions related to the code sample?\n",
       "\n",
-      "```python\n",
-      "!pip install flaml pyspark joblib-spark\n",
-      "```\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Senior_Python_Engineer\n",
+      "\u001b[0m\n",
+      "\u001b[33mSenior_Python_Engineer\u001b[0m (to chat_manager):\n",
       "\n",
-      "Here's a sample code snippet that demonstrates how to use FLAML with Spark for parallel training:\n",
+      "No, I don't have any questions related to the code sample.\n",
       "\n",
-      "```python\n",
-      "from flaml import AutoML\n",
-      "from pyspark.sql import SparkSession\n",
-      "from sklearn.datasets import load_digits\n",
-      "from joblibspark import register_spark\n",
-      "\n",
-      "# Initialize a Spark session\n",
-      "spark = SparkSession.builder \\\n",
-      "    .master(\"local[*]\") \\\n",
-      "    .appName(\"FLAML_Spark_Example\") \\\n",
-      "    .getOrCreate()\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
+      "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
       "\n",
-      "# Register the joblib spark backend\n",
-      "register_spark()  # This registers the backend for parallel processing\n",
+      "Great, let me know if you need any further assistance.\n",
       "\n",
-      "# Load sample data\n",
-      "X, y = load_digits(return_X_y=True)\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Senior_Python_Engineer\n",
+      "\u001b[0m\n",
+      "\u001b[33mSenior_Python_Engineer\u001b[0m (to chat_manager):\n",
       "\n",
-      "# Initialize an AutoML instance\n",
-      "automl = AutoML()\n",
+      "Sure, will do. Thank you!\n",
       "\n",
-      "# Define the settings for the AutoML run\n",
-      "settings = {\n",
-      "    \"time_budget\": 60,  # Total running time in seconds\n",
-      "    \"metric\": 'accuracy',  # Primary metric for evaluation\n",
-      "    \"task\": 'classification',  # Task type\n",
-      "    \"n_jobs\": -1,  # Number of jobs to run in parallel (use -1 for all)\n",
-      "    \"estimator_list\": ['lgbm', 'rf', 'xgboost'],  # List of estimators to consider\n",
-      "    \"log_file_name\": \"flaml_log.txt\",  # Log file name\n",
-      "}\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
+      "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
       "\n",
-      "# Run the AutoML search with Spark backend\n",
-      "automl.fit(X_train=X, y_train=y, **settings)\n",
+      "You're welcome! Have a great day ahead!\n",
       "\n",
-      "# Output the best model and its performance\n",
-      "print(f\"Best ML model: {automl.model}\")\n",
-      "print(f\"Best ML model's accuracy: {automl.best_loss}\")\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Senior_Python_Engineer\n",
+      "\u001b[0m\n",
+      "\u001b[33mSenior_Python_Engineer\u001b[0m (to chat_manager):\n",
       "\n",
-      "# Stop the Spark session\n",
-      "spark.stop()\n",
-      "```\n",
+      "You too, have a great day ahead!\n",
       "\n",
-      "The `register_spark()` function from `joblib-spark` is used to register the Spark backend with joblib, which is utilized for parallel training within FLAML. The `n_jobs=-1` parameter tells FLAML to use all available Spark executors for parallel training.\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
+      "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
       "\n",
-      "Please note that the actual process of setting up a Spark cluster can be complex and might involve additional steps such as configuring Spark workers, allocating resources, and more, which are beyond the scope of this code snippet.\n",
+      "Thank you! Goodbye!\n",
       "\n",
-      "If you encounter any issues or need to adjust configurations for your specific Spark setup, please refer to the Spark and FLAML documentation for more details.\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Senior_Python_Engineer\n",
+      "\u001b[0m\n",
+      "\u001b[33mSenior_Python_Engineer\u001b[0m (to chat_manager):\n",
       "\n",
-      "When you run the code, ensure that your Spark cluster is properly configured and accessible from your Python environment. Adjust the `.master(\"local[*]\")` to point to your Spark master's URL if you are running a cluster that is not local.\n",
+      "Goodbye!\n",
       "\n",
       "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Code_Reviewer\n",
+      "\u001b[0m\n",
       "\u001b[33mCode_Reviewer\u001b[0m (to chat_manager):\n",
       "\n",
       "TERMINATE\n",
@@ -420,38 +398,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2024-04-07 18:26:04,562 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - \u001b[32mUse the existing collection `groupchat`.\u001b[0m\n"
+      "Trying to create collection.\n"
      ]
     },
     {
-     "name": "stdout",
+     "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Trying to create collection.\n"
+      "2024-08-14 06:59:09,583 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - \u001b[32mUse the existing collection `groupchat`.\u001b[0m\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2024-04-07 18:26:05,485 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 1 chunks.\u001b[0m\n",
-      "Number of requested results 3 is greater than number of elements in index 1, updating n_results = 1\n",
-      "Model gpt4-1106-preview not found. Using cl100k_base encoding.\n"
+      "2024-08-14 06:59:09,902 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 2 chunks.\u001b[0m\n",
+      "2024-08-14 06:59:09,912 - autogen.agentchat.contrib.vectordb.chromadb - INFO - No content embedding is provided. Will use the VectorDB's embedding function to generate the content embedding.\u001b[0m\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "VectorDB returns doc_ids:  [['bdfbc921']]\n",
+      "VectorDB returns doc_ids:  [['bdfbc921', 'b2c1ec51', '0e57e70f']]\n",
       "\u001b[32mAdding content of doc bdfbc921 to context.\u001b[0m\n",
+      "\u001b[32mAdding content of doc b2c1ec51 to context.\u001b[0m\n",
       "\u001b[33mBoss_Assistant\u001b[0m (to chat_manager):\n",
       "\n",
       "You're a retrieve augmented coding assistant. You answer user's questions based on your own knowledge and the\n",
@@ -595,10 +573,90 @@
       "```\n",
       "\n",
       "[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb)\n",
+      "# Integrate - Spark\n",
+      "\n",
+      "FLAML has integrated Spark for distributed training. There are two main aspects of integration with Spark:\n",
+      "\n",
+      "- Use Spark ML estimators for AutoML.\n",
+      "- Use Spark to run training in parallel spark jobs.\n",
+      "\n",
+      "## Spark ML Estimators\n",
+      "\n",
+      "FLAML integrates estimators based on Spark ML models. These models are trained in parallel using Spark, so we called them Spark estimators. To use these models, you first need to organize your data in the required format.\n",
+      "\n",
+      "### Data\n",
+      "\n",
+      "For Spark estimators, AutoML only consumes Spark data. FLAML provides a convenient function `to_pandas_on_spark` in the `flaml.automl.spark.utils` module to convert your data into a pandas-on-spark (`pyspark.pandas`) dataframe/series, which Spark estimators require.\n",
+      "\n",
+      "This utility function takes data in the form of a `pandas.Dataframe` or `pyspark.sql.Dataframe` and converts it into a pandas-on-spark dataframe. It also takes `pandas.Series` or `pyspark.sql.Dataframe` and converts it into a [pandas-on-spark](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/index.html) series. If you pass in a `pyspark.pandas.Dataframe`, it will not make any changes.\n",
+      "\n",
+      "This function also accepts optional arguments `index_col` and `default_index_type`.\n",
+      "\n",
+      "- `index_col` is the column name to use as the index, default is None.\n",
+      "- `default_index_type` is the default index type, default is \"distributed-sequence\". More info about default index type could be found on Spark official [documentation](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/options.html#default-index-type)\n",
+      "\n",
+      "Here is an example code snippet for Spark Data:\n",
+      "\n",
+      "```python\n",
+      "import pandas as pd\n",
+      "from flaml.automl.spark.utils import to_pandas_on_spark\n",
+      "\n",
+      "# Creating a dictionary\n",
+      "data = {\n",
+      "    \"Square_Feet\": [800, 1200, 1800, 1500, 850],\n",
+      "    \"Age_Years\": [20, 15, 10, 7, 25],\n",
+      "    \"Price\": [100000, 200000, 300000, 240000, 120000],\n",
+      "}\n",
+      "\n",
+      "# Creating a pandas DataFrame\n",
+      "dataframe = pd.DataFrame(data)\n",
+      "label = \"Price\"\n",
+      "\n",
+      "# Convert to pandas-on-spark dataframe\n",
+      "psdf = to_pandas_on_spark(dataframe)\n",
+      "```\n",
+      "\n",
+      "To use Spark ML models you need to format your data appropriately. Specifically, use [`VectorAssembler`](https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.feature.VectorAssembler.html) to merge all feature columns into a single vector column.\n",
+      "\n",
+      "Here is an example of how to use it:\n",
+      "\n",
+      "```python\n",
+      "from pyspark.ml.feature import VectorAssembler\n",
+      "\n",
+      "columns = psdf.columns\n",
+      "feature_cols = [col for col in columns if col != label]\n",
+      "featurizer = VectorAssembler(inputCols=feature_cols, outputCol=\"features\")\n",
+      "psdf = featurizer.transform(psdf.to_spark(index_col=\"index\"))[\"index\", \"features\"]\n",
+      "```\n",
+      "\n",
+      "Later in conducting the experiment, use your pandas-on-spark data like non-spark data and pass them using `X_train, y_train` or `dataframe, label`.\n",
+      "\n",
+      "### Estimators\n",
+      "\n",
+      "#### Model List\n",
+      "\n",
+      "- `lgbm_spark`: The class for fine-tuning Spark version LightGBM models, using [SynapseML](https://microsoft.github.io/SynapseML/docs/features/lightgbm/about/) API.\n",
+      "\n",
+      "#### Usage\n",
+      "\n",
+      "First, prepare your data in the required format as described in the previous section.\n",
+      "\n",
+      "By including the models you intend to try in the `estimators_list` argument to `flaml.automl`, FLAML will start trying configurations for these models. If your input is Spark data, FLAML will also use estimators with the `_spark` postfix by default, even if you haven't specified them.\n",
+      "\n",
+      "Here is an example code snippet using SparkML models in AutoML:\n",
+      "\n",
+      "```python\n",
+      "import flaml\n",
+      "\n",
+      "# prepare your data in pandas-on-spark format as we previously mentioned\n",
       "\n",
       "\n",
       "\n",
       "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
+      "\u001b[32mAdding content of doc b2c1ec51 to context.\u001b[0m\n",
       "\u001b[33mBoss_Assistant\u001b[0m (to chat_manager):\n",
       "\n",
       "You're a retrieve augmented coding assistant. You answer user's questions based on your own knowledge and the\n",
@@ -742,58 +800,188 @@
       "```\n",
       "\n",
       "[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb)\n",
+      "# Integrate - Spark\n",
       "\n",
+      "FLAML has integrated Spark for distributed training. There are two main aspects of integration with Spark:\n",
       "\n",
+      "- Use Spark ML estimators for AutoML.\n",
+      "- Use Spark to run training in parallel spark jobs.\n",
       "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
+      "## Spark ML Estimators\n",
+      "\n",
+      "FLAML integrates estimators based on Spark ML models. These models are trained in parallel using Spark, so we called them Spark estimators. To use these models, you first need to organize your data in the required format.\n",
+      "\n",
+      "### Data\n",
+      "\n",
+      "For Spark estimators, AutoML only consumes Spark data. FLAML provides a convenient function `to_pandas_on_spark` in the `flaml.automl.spark.utils` module to convert your data into a pandas-on-spark (`pyspark.pandas`) dataframe/series, which Spark estimators require.\n",
+      "\n",
+      "This utility function takes data in the form of a `pandas.Dataframe` or `pyspark.sql.Dataframe` and converts it into a pandas-on-spark dataframe. It also takes `pandas.Series` or `pyspark.sql.Dataframe` and converts it into a [pandas-on-spark](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/index.html) series. If you pass in a `pyspark.pandas.Dataframe`, it will not make any changes.\n",
+      "\n",
+      "This function also accepts optional arguments `index_col` and `default_index_type`.\n",
+      "\n",
+      "- `index_col` is the column name to use as the index, default is None.\n",
+      "- `default_index_type` is the default index type, default is \"distributed-sequence\". More info about default index type could be found on Spark official [documentation](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/options.html#default-index-type)\n",
+      "\n",
+      "Here is an example code snippet for Spark Data:\n",
       "\n",
       "```python\n",
-      "from flaml.automl import AutoML\n",
-      "from flaml.automl.spark.utils import to_pandas_on_spark\n",
-      "from pyspark.ml.feature import VectorAssembler\n",
       "import pandas as pd\n",
+      "from flaml.automl.spark.utils import to_pandas_on_spark\n",
       "\n",
-      "# Sample data in a dictionary\n",
+      "# Creating a dictionary\n",
       "data = {\n",
       "    \"Square_Feet\": [800, 1200, 1800, 1500, 850],\n",
       "    \"Age_Years\": [20, 15, 10, 7, 25],\n",
       "    \"Price\": [100000, 200000, 300000, 240000, 120000],\n",
       "}\n",
       "\n",
-      "# Convert dictionary to pandas DataFrame\n",
+      "# Creating a pandas DataFrame\n",
       "dataframe = pd.DataFrame(data)\n",
       "label = \"Price\"\n",
       "\n",
-      "# Convert pandas DataFrame to pandas-on-spark DataFrame\n",
+      "# Convert to pandas-on-spark dataframe\n",
       "psdf = to_pandas_on_spark(dataframe)\n",
+      "```\n",
+      "\n",
+      "To use Spark ML models you need to format your data appropriately. Specifically, use [`VectorAssembler`](https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.feature.VectorAssembler.html) to merge all feature columns into a single vector column.\n",
+      "\n",
+      "Here is an example of how to use it:\n",
+      "\n",
+      "```python\n",
+      "from pyspark.ml.feature import VectorAssembler\n",
       "\n",
-      "# Use VectorAssembler to merge feature columns into a single vector column\n",
-      "feature_cols = [col for col in psdf.columns if col != label]\n",
+      "columns = psdf.columns\n",
+      "feature_cols = [col for col in columns if col != label]\n",
       "featurizer = VectorAssembler(inputCols=feature_cols, outputCol=\"features\")\n",
-      "psdf = featurizer.transform(psdf.to_spark(index_col=\"index\"))[\"index\", \"features\", label]\n",
+      "psdf = featurizer.transform(psdf.to_spark(index_col=\"index\"))[\"index\", \"features\"]\n",
+      "```\n",
+      "\n",
+      "Later in conducting the experiment, use your pandas-on-spark data like non-spark data and pass them using `X_train, y_train` or `dataframe, label`.\n",
+      "\n",
+      "### Estimators\n",
+      "\n",
+      "#### Model List\n",
+      "\n",
+      "- `lgbm_spark`: The class for fine-tuning Spark version LightGBM models, using [SynapseML](https://microsoft.github.io/SynapseML/docs/features/lightgbm/about/) API.\n",
+      "\n",
+      "#### Usage\n",
+      "\n",
+      "First, prepare your data in the required format as described in the previous section.\n",
+      "\n",
+      "By including the models you intend to try in the `estimators_list` argument to `flaml.automl`, FLAML will start trying configurations for these models. If your input is Spark data, FLAML will also use estimators with the `_spark` postfix by default, even if you haven't specified them.\n",
+      "\n",
+      "Here is an example code snippet using SparkML models in AutoML:\n",
+      "\n",
+      "```python\n",
+      "import flaml\n",
+      "\n",
+      "# prepare your data in pandas-on-spark format as we previously mentioned\n",
+      "\n",
+      "\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
+      "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
+      "\n",
+      "```python\n",
+      "from flaml import AutoML\n",
+      "\n",
+      "# Assuming psdf is the pandas-on-spark dataframe and label is the name of the target variable\n",
+      "# Presuming that the data conversion and feature vectorization have been done as shown in the context\n",
       "\n",
-      "# Initialize AutoML instance\n",
       "automl = AutoML()\n",
       "\n",
-      "# AutoML settings\n",
-      "automl_settings = {\n",
-      "    \"time_budget\": 30,  # Total running time in seconds\n",
-      "    \"metric\": \"r2\",     # Evaluation metric\n",
-      "    \"task\": \"regression\",\n",
-      "    \"n_concurrent_trials\": 2,   # Number of concurrent Spark jobs\n",
-      "    \"use_spark\": True,          # Enable Spark for parallel training\n",
-      "    \"force_cancel\": True,       # Force cancel Spark jobs if they exceed the time budget\n",
-      "    \"estimator_list\": [\"lgbm_spark\"]  # Optional: Specific estimator to use\n",
+      "settings = {\n",
+      "    \"time_budget\": 120,  # for example, set the time budget to 2 minutes\n",
+      "    \"metric\": \"accuracy\",  # assuming a classification problem, change to \"r2\" for regression\n",
+      "    \"estimator_list\": [\"lgbm_spark\"],  # specify the Spark estimator\n",
+      "    \"task\": \"classification\",  # assuming a classification problem, change to \"regression\" for regression\n",
+      "    \"n_concurrent_trials\": 2,  # number of concurrent Spark jobs\n",
+      "    \"use_spark\": True,  # enable distributed training using Spark\n",
       "}\n",
       "\n",
-      "# Run AutoML fit with pandas-on-spark dataframe\n",
-      "automl.fit(\n",
-      "    dataframe=psdf,\n",
-      "    label=label,\n",
-      "    **automl_settings,\n",
-      ")\n",
+      "automl.fit(dataframe=psdf, label=label, **settings)\n",
+      "```\n",
+      "Please adjust the `metric`, `task`, and other settings according to your specific problem and requirements. This code snippet sets up FLAML with Spark for parallel training using the LightGBM Spark estimator, with two concurrent trials. Make sure your Spark environment is properly configured to run the distributed training.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Senior_Python_Engineer\n",
+      "\u001b[0m\n",
+      "\u001b[33mSenior_Python_Engineer\u001b[0m (to chat_manager):\n",
+      "\n",
+      "```python\n",
+      "from flaml import AutoML\n",
+      "\n",
+      "# Assuming psdf is the pandas-on-spark dataframe and label is the name of the target variable\n",
+      "# Presuming that the data conversion and feature vectorization have been done as shown in the context\n",
+      "\n",
+      "automl = AutoML()\n",
+      "\n",
+      "settings = {\n",
+      "    \"time_budget\": 120,  # for example, set the time budget to 2 minutes\n",
+      "    \"metric\": \"accuracy\",  # assuming a classification problem, change to \"r2\" for regression\n",
+      "    \"estimator_list\": [\"lgbm_spark\"],  # specify the Spark estimator\n",
+      "    \"task\": \"classification\",  # assuming a classification problem, change to \"regression\" for regression\n",
+      "    \"n_concurrent_trials\": 2,  # number of concurrent Spark jobs\n",
+      "    \"use_spark\": True,  # enable distributed training using Spark\n",
+      "}\n",
+      "\n",
+      "automl.fit(dataframe=psdf, label=label, **settings)\n",
+      "```\n",
+      "Please adjust the `metric`, `task`, and other settings according to your specific problem and requirements. This code snippet sets up FLAML with Spark for parallel training using the LightGBM Spark estimator, with two concurrent trials. Make sure your Spark environment is properly configured to run the distributed training.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Code_Reviewer\n",
+      "\u001b[0m\n",
+      "\u001b[33mCode_Reviewer\u001b[0m (to chat_manager):\n",
+      "\n",
+      "The provided code snippet is mostly correct and follows the guidelines provided in the context. However, there is one minor issue: if we are using the pandas-on-spark DataFrame `psdf`, the `fit` method should be called with `dataframe` and `label` arguments, not `X_train` and `y_train`.\n",
+      "\n",
+      "This is because, with FLAML and Spark integration, the `fit` method expects the entire data as a single pandas-on-spark DataFrame along with the name of the target variable as `label`, rather than being provided with separate feature and target data as it would expect with standard pandas DataFrames.\n",
+      "\n",
+      "Here's the correct code snippet reflecting this:\n",
+      "\n",
+      "```python\n",
+      "from flaml import AutoML\n",
+      "\n",
+      "# Assuming psdf is the pandas-on-spark dataframe and label is the name of the target variable\n",
+      "# Presuming that the data conversion and feature vectorization have been done as shown in the context\n",
+      "\n",
+      "automl = AutoML()\n",
+      "\n",
+      "settings = {\n",
+      "    \"time_budget\": 120,  # for example, set the time budget to 2 minutes\n",
+      "    \"metric\": \"accuracy\",  # assuming a classification problem, change to \"r2\" for regression\n",
+      "    \"estimator_list\": [\"lgbm_spark\"],  # specify the Spark estimator\n",
+      "    \"task\": \"classification\",  # assuming a classification problem, change to \"regression\" for regression\n",
+      "    \"n_concurrent_trials\": 2,  # number of concurrent Spark jobs\n",
+      "    \"use_spark\": True,  # enable distributed training using Spark\n",
+      "}\n",
+      "\n",
+      "# Use dataframe and label parameters to fit the model\n",
+      "automl.fit(dataframe=psdf, label=label, **settings)\n",
       "```\n",
+      "\n",
+      "Please ensure that your Spark cluster is correctly configured to support distributed training, and adjust the `metric`, `task`, and other settings as needed for your specific use case.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Boss_Assistant\n",
+      "\u001b[0m\n",
+      "\u001b[33mBoss_Assistant\u001b[0m (to chat_manager):\n",
+      "\n",
+      "Reply `TERMINATE` if the task is done.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
+      "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
+      "\n",
       "TERMINATE\n",
       "\n",
       "--------------------------------------------------------------------------------\n"
@@ -816,7 +1004,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -828,14 +1016,26 @@
       "How to use spark for parallel training in FLAML? Give me sample code.\n",
       "\n",
       "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
       "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
       "\n",
       "\u001b[32m***** Suggested function call: retrieve_content *****\u001b[0m\n",
       "Arguments: \n",
-      "{\"message\":\"using Apache Spark for parallel training in FLAML with sample code\"}\n",
+      "{\"message\":\"How to use spark for parallel training in FLAML? Give me sample code.\",\"n_results\":3}\n",
       "\u001b[32m*****************************************************\u001b[0m\n",
       "\n",
       "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Boss\n",
+      "\u001b[0m\n",
       "\u001b[35m\n",
       ">>>>>>>> EXECUTING FUNCTION retrieve_content...\u001b[0m\n"
      ]
@@ -844,16 +1044,19 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Number of requested results 3 is greater than number of elements in index 1, updating n_results = 1\n",
-      "Model gpt4-1106-preview not found. Using cl100k_base encoding.\n"
+      "2024-08-14 07:09:05,717 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - \u001b[32mUse the existing collection `groupchat`.\u001b[0m\n",
+      "2024-08-14 07:09:05,845 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 2 chunks.\u001b[0m\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "VectorDB returns doc_ids:  [['bdfbc921']]\n",
+      "Trying to create collection.\n",
+      "VectorDB returns doc_ids:  [['bdfbc921', 'b2c1ec51', '0e57e70f']]\n",
       "\u001b[32mAdding content of doc bdfbc921 to context.\u001b[0m\n",
+      "\u001b[32mAdding content of doc b2c1ec51 to context.\u001b[0m\n",
+      "\u001b[32mAdding content of doc 0e57e70f to context.\u001b[0m\n",
       "\u001b[33mBoss\u001b[0m (to chat_manager):\n",
       "\n",
       "\u001b[32m***** Response from calling function (retrieve_content) *****\u001b[0m\n",
@@ -867,7 +1070,7 @@
       "# your code\n",
       "```\n",
       "\n",
-      "User's question is: using Apache Spark for parallel training in FLAML with sample code\n",
+      "User's question is: How to use spark for parallel training in FLAML? Give me sample code.\n",
       "\n",
       "Context is: # Integrate - Spark\n",
       "\n",
@@ -998,27 +1201,7 @@
       "```\n",
       "\n",
       "[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb)\n",
-      "\n",
-      "\n",
-      "\u001b[32m*************************************************************\u001b[0m\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mBoss\u001b[0m (to chat_manager):\n",
-      "\n",
-      "\u001b[32m***** Response from calling function (retrieve_content) *****\u001b[0m\n",
-      "You're a retrieve augmented coding assistant. You answer user's questions based on your own knowledge and the\n",
-      "context provided by the user.\n",
-      "If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.\n",
-      "For code generation, you must obey the following rules:\n",
-      "Rule 1. You MUST NOT install any packages because all the packages needed are already installed.\n",
-      "Rule 2. You must follow the formats below to write your code:\n",
-      "```language\n",
-      "# your code\n",
-      "```\n",
-      "\n",
-      "User's question is: using Apache Spark for parallel training in FLAML with sample code\n",
-      "\n",
-      "Context is: # Integrate - Spark\n",
+      "# Integrate - Spark\n",
       "\n",
       "FLAML has integrated Spark for distributed training. There are two main aspects of integration with Spark:\n",
       "\n",
@@ -1094,7 +1277,6 @@
       "import flaml\n",
       "\n",
       "# prepare your data in pandas-on-spark format as we previously mentioned\n",
-      "\n",
       "automl = flaml.AutoML()\n",
       "settings = {\n",
       "    \"time_budget\": 30,\n",
@@ -1152,84 +1334,87 @@
       "\u001b[32m*************************************************************\u001b[0m\n",
       "\n",
       "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
       "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
       "\n",
-      "To use Apache Spark for parallel training in FLAML, you can follow these steps:\n",
+      "To use Spark for parallel training in FLAML, follow these steps:\n",
       "\n",
-      "1. Ensure your data is in the required pandas-on-spark format.\n",
-      "2. Use Spark ML estimators by including them in the `estimator_list`.\n",
-      "3. Set `use_spark` to `True` for parallel tuning.\n",
+      "## Steps:\n",
       "\n",
-      "Here's a sample code demonstrating how to use Spark for parallel training in FLAML:\n",
+      "1. **Prepare Your Data:**\n",
+      "   Convert your data into a pandas-on-spark DataFrame using `to_pandas_on_spark` function.\n",
+      "\n",
+      "2. **Configure Spark Settings:**\n",
+      "   Set the `use_spark` parameter to `True` to enable Spark for parallel training jobs.\n",
+      "\n",
+      "3. **Run the AutoML Experiment:**\n",
+      "   Configure the AutoML settings and run the experiment.\n",
+      "\n",
+      "## Sample Code:\n",
       "\n",
       "```python\n",
+      "import pandas as pd\n",
       "import flaml\n",
       "from flaml.automl.spark.utils import to_pandas_on_spark\n",
-      "import pandas as pd\n",
-      "from pyspark.ml.feature import VectorAssembler\n",
       "\n",
-      "# Sample data in a pandas DataFrame\n",
+      "# Prepare your data\n",
       "data = {\n",
       "    \"Square_Feet\": [800, 1200, 1800, 1500, 850],\n",
       "    \"Age_Years\": [20, 15, 10, 7, 25],\n",
       "    \"Price\": [100000, 200000, 300000, 240000, 120000],\n",
       "}\n",
-      "label = \"Price\"\n",
       "\n",
-      "# Creating a pandas DataFrame\n",
       "dataframe = pd.DataFrame(data)\n",
+      "label = \"Price\"\n",
       "\n",
       "# Convert to pandas-on-spark dataframe\n",
       "psdf = to_pandas_on_spark(dataframe)\n",
       "\n",
-      "# Prepare features using VectorAssembler\n",
+      "# Use VectorAssembler to format data for Spark ML\n",
+      "from pyspark.ml.feature import VectorAssembler\n",
+      "\n",
       "columns = psdf.columns\n",
       "feature_cols = [col for col in columns if col != label]\n",
       "featurizer = VectorAssembler(inputCols=feature_cols, outputCol=\"features\")\n",
       "psdf = featurizer.transform(psdf.to_spark(index_col=\"index\"))[\"index\", \"features\"]\n",
       "\n",
-      "# Initialize AutoML\n",
+      "# Configure AutoML settings\n",
       "automl = flaml.AutoML()\n",
-      "\n",
-      "# Configure settings for AutoML\n",
-      "settings = {\n",
-      "    \"time_budget\": 30,  # time budget in seconds\n",
+      "automl_settings = {\n",
+      "    \"time_budget\": 30,\n",
       "    \"metric\": \"r2\",\n",
-      "    \"estimator_list\": [\"lgbm_spark\"],  # using Spark ML estimators\n",
       "    \"task\": \"regression\",\n",
-      "    \"n_concurrent_trials\": 2,  # number of parallel trials\n",
-      "    \"use_spark\": True,  # enable parallel training using Spark\n",
-      "    \"force_cancel\": True,  # force cancel Spark jobs if time_budget is exceeded\n",
+      "    \"n_concurrent_trials\": 2,\n",
+      "    \"use_spark\": True,\n",
+      "    \"force_cancel\": True,  # Optionally force cancel jobs that exceed time budget\n",
       "}\n",
       "\n",
-      "# Start the training\n",
-      "automl.fit(dataframe=psdf, label=label, **settings)\n",
+      "# Run the AutoML experiment\n",
+      "automl.fit(\n",
+      "    dataframe=psdf,\n",
+      "    label=label,\n",
+      "    **automl_settings,\n",
+      ")\n",
       "```\n",
       "\n",
-      "In this code snippet:\n",
-      "- The `to_pandas_on_spark` function is used to convert the pandas DataFrame to a pandas-on-spark DataFrame.\n",
-      "- `VectorAssembler` is used to transform feature columns into a single vector column.\n",
-      "- The `AutoML` object is created, and settings are configured for the AutoML run, including setting `use_spark` to `True` for parallel training.\n",
-      "- The `fit` method is called to start the automated machine learning process.\n",
+      "This code demonstrates how to prepare your data, configure Spark settings for parallel training, and run the AutoML experiment using FLAML with Spark.\n",
       "\n",
-      "By using these settings, FLAML will train the models in parallel using Spark, which can accelerate the training process on large models and datasets.\n",
+      "You can find more information and examples in the [FLAML documentation](https://github.com/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb).\n",
       "\n",
       "TERMINATE\n",
       "\n",
-      "--------------------------------------------------------------------------------\n"
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Senior_Python_Engineer\n",
+      "\u001b[0m\n"
      ]
     }
    ],
    "source": [
     "call_rag_chat()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -1256,7 +1441,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.12.4"
   }
  },
  "nbformat": 4,
diff --git a/notebook/agentchat_microsoft_fabric.ipynb b/notebook/agentchat_microsoft_fabric.ipynb
index 97cab73b4eaa..8e128d733e6d 100644
--- a/notebook/agentchat_microsoft_fabric.ipynb
+++ b/notebook/agentchat_microsoft_fabric.ipynb
@@ -20,7 +20,7 @@
     "\n",
     "In this notebook, we demonstrate several examples:\n",
     "- 1. How to use `AssistantAgent` and `UserProxyAgent` to write code and execute the code.\n",
-    "- 2. How to use `RetrieveAssistantAgent` and `RetrieveUserProxyAgent` to do Retrieval Augmented Generation (RAG) for QA and Code Generation.\n",
+    "- 2. How to use `AssistantAgent` and `RetrieveUserProxyAgent` to do Retrieval Augmented Generation (RAG) for QA and Code Generation.\n",
     "- 3. How to use `MultimodalConversableAgent` to chat with images.\n",
     "\n",
     "### Requirements\n",
@@ -139,6 +139,7 @@
     "    }\n",
     "    return config_list, llm_config\n",
     "\n",
+    "\n",
     "config_list, llm_config = get_config_list()\n",
     "\n",
     "assert len(config_list) > 0\n",
@@ -401,7 +402,7 @@
    },
    "source": [
     "### Example 2\n",
-    "How to use `RetrieveAssistantAgent` and `RetrieveUserProxyAgent` to do Retrieval Augmented Generation (RAG) for QA and Code Generation.\n",
+    "How to use `AssistantAgent` and `RetrieveUserProxyAgent` to do Retrieval Augmented Generation (RAG) for QA and Code Generation.\n",
     "\n",
     "Check out this [blog](https://microsoft.github.io/autogen/blog/2023/10/18/RetrieveChat) for more details."
    ]
@@ -479,11 +480,11 @@
    },
    "outputs": [],
    "source": [
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
+    "from autogen import AssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "\n",
-    "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
-    "assistant = RetrieveAssistantAgent(\n",
+    "# 1. create an AssistantAgent instance named \"assistant\"\n",
+    "assistant = AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config=llm_config,\n",
diff --git a/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py b/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py
index ca24f952f76d..3c566352b3e2 100644
--- a/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py
+++ b/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py
@@ -6,8 +6,7 @@
 import pytest
 from sentence_transformers import SentenceTransformer
 
-from autogen import config_list_from_json
-from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
+from autogen import AssistantAgent, config_list_from_json
 
 sys.path.append(os.path.join(os.path.dirname(__file__), "../../.."))
 from conftest import skip_openai  # noqa: E402
@@ -18,9 +17,6 @@
 try:
     import pgvector
 
-    from autogen.agentchat.contrib.retrieve_assistant_agent import (
-        RetrieveAssistantAgent,
-    )
     from autogen.agentchat.contrib.retrieve_user_proxy_agent import (
         RetrieveUserProxyAgent,
     )
@@ -46,7 +42,7 @@ def test_retrievechat():
         file_location=KEY_LOC,
     )
 
-    assistant = RetrieveAssistantAgent(
+    assistant = AssistantAgent(
         name="assistant",
         system_message="You are a helpful assistant.",
         llm_config={
diff --git a/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py b/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py
index 85f098c64b1b..92ca5aa603a9 100755
--- a/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py
+++ b/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py
@@ -5,8 +5,7 @@
 
 import pytest
 
-from autogen import config_list_from_json
-from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
+from autogen import AssistantAgent, config_list_from_json
 
 sys.path.append(os.path.join(os.path.dirname(__file__), "../../.."))
 from conftest import skip_openai  # noqa: E402
@@ -51,7 +50,7 @@ def test_retrievechat():
         file_location=KEY_LOC,
     )
 
-    assistant = RetrieveAssistantAgent(
+    assistant = AssistantAgent(
         name="assistant",
         system_message="You are a helpful assistant.",
         llm_config={
diff --git a/test/agentchat/contrib/retrievechat/test_retrievechat.py b/test/agentchat/contrib/retrievechat/test_retrievechat.py
index ceb973577859..0504fc82be42 100755
--- a/test/agentchat/contrib/retrievechat/test_retrievechat.py
+++ b/test/agentchat/contrib/retrievechat/test_retrievechat.py
@@ -18,9 +18,7 @@
     import openai
     from chromadb.utils import embedding_functions as ef
 
-    from autogen.agentchat.contrib.retrieve_assistant_agent import (
-        RetrieveAssistantAgent,
-    )
+    from autogen import AssistantAgent
     from autogen.agentchat.contrib.retrieve_user_proxy_agent import (
         RetrieveUserProxyAgent,
     )
@@ -45,7 +43,7 @@ def test_retrievechat():
         file_location=KEY_LOC,
     )
 
-    assistant = RetrieveAssistantAgent(
+    assistant = AssistantAgent(
         name="assistant",
         system_message="You are a helpful assistant.",
         llm_config={
diff --git a/website/blog/2023-10-18-RetrieveChat/index.mdx b/website/blog/2023-10-18-RetrieveChat/index.mdx
index 12ee03051321..91b8b5012a3b 100644
--- a/website/blog/2023-10-18-RetrieveChat/index.mdx
+++ b/website/blog/2023-10-18-RetrieveChat/index.mdx
@@ -4,12 +4,12 @@ authors: thinkall
 tags: [LLM, RAG]
 ---
 
-*Last update: April 4, 2024; AutoGen version: v0.2.21*
+*Last update: August 14, 2024; AutoGen version: v0.2.35*
 
 ![RAG Architecture](img/retrievechat-arch.png)
 
 **TL;DR:**
-* We introduce **RetrieveUserProxyAgent** and **RetrieveAssistantAgent**, RAG agents of AutoGen that
+* We introduce **RetrieveUserProxyAgent**, RAG agents of AutoGen that
 allows retrieval-augmented generation, and its basic usage.
 * We showcase customizations of RAG agents, such as customizing the embedding function, the text
 split function and vector database.
@@ -21,8 +21,9 @@ application with Gradio.
 Retrieval augmentation has emerged as a practical and effective approach for mitigating the intrinsic
 limitations of LLMs by incorporating external documents. In this blog post, we introduce RAG agents of
 AutoGen that allows retrieval-augmented generation. The system consists of two agents: a
-Retrieval-augmented User Proxy agent, called `RetrieveUserProxyAgent`, and a Retrieval-augmented Assistant
-agent, called `RetrieveAssistantAgent`, both of which are extended from built-in agents from AutoGen.
+Retrieval-augmented User Proxy agent, called `RetrieveUserProxyAgent`, and an Assistant
+agent, called `RetrieveAssistantAgent`; `RetrieveUserProxyAgent` is extended from built-in agents from AutoGen,
+while `RetrieveAssistantAgent` can be any conversable agent with LLM configured.
 The overall architecture of the RAG agents is shown in the figure above.
 
 To use Retrieval-augmented Chat, one needs to initialize two agents including Retrieval-augmented
@@ -75,13 +76,17 @@ You can find a list of all supported document types by using `autogen.retrieve_u
 1. Import Agents
 ```python
 import autogen
-from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
+from autogen import AssistantAgent
 from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
 ```
 
-2. Create an 'RetrieveAssistantAgent' instance named "assistant" and an 'RetrieveUserProxyAgent' instance named "ragproxyagent"
+2. Create an 'AssistantAgent' instance named "assistant" and an 'RetrieveUserProxyAgent' instance named "ragproxyagent"
+
+Refer to the [doc](https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/retrieve_user_proxy_agent)
+for more information on the detailed configurations.
+
 ```python
-assistant = RetrieveAssistantAgent(
+assistant = AssistantAgent(
     name="assistant",
     system_message="You are a helpful assistant.",
     llm_config=llm_config,
@@ -195,93 +200,12 @@ ragproxyagent = RetrieveUserProxyAgent(
 
 
 ### Customizing Vector Database
-We are using chromadb as the default vector database, you can also replace it with any other vector database
-by simply overriding the function `retrieve_docs` of `RetrieveUserProxyAgent`.
-
-For example, you can use Qdrant as below:
-
-```python
-# Creating qdrant client
-from qdrant_client import QdrantClient
-
-client = QdrantClient(url="***", api_key="***")
-
-# Wrapping RetrieveUserProxyAgent
-from litellm import embedding as test_embedding
-from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
-from qdrant_client.models import SearchRequest, Filter, FieldCondition, MatchText
-
-class QdrantRetrieveUserProxyAgent(RetrieveUserProxyAgent):
-    def query_vector_db(
-        self,
-        query_texts: List[str],
-        n_results: int = 10,
-        search_string: str = "",
-        **kwargs,
-    ) -> Dict[str, Union[List[str], List[List[str]]]]:
-        # define your own query function here
-        embed_response = test_embedding('text-embedding-ada-002', input=query_texts)
-
-        all_embeddings: List[List[float]] = []
-
-        for item in embed_response['data']:
-            all_embeddings.append(item['embedding'])
-
-        search_queries: List[SearchRequest] = []
-
-        for embedding in all_embeddings:
-            search_queries.append(
-                SearchRequest(
-                    vector=embedding,
-                    filter=Filter(
-                        must=[
-                            FieldCondition(
-                                key="page_content",
-                                match=MatchText(
-                                    text=search_string,
-                                )
-                            )
-                        ]
-                    ),
-                    limit=n_results,
-                    with_payload=True,
-                )
-            )
-
-        search_response = client.search_batch(
-            collection_name="{your collection name}",
-            requests=search_queries,
-        )
-
-        return {
-            "ids": [[scored_point.id for scored_point in batch] for batch in search_response],
-            "documents": [[scored_point.payload.get('page_content', '') for scored_point in batch] for batch in search_response],
-            "metadatas": [[scored_point.payload.get('metadata', {}) for scored_point in batch] for batch in search_response]
-        }
-
-    def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str = "", **kwargs):
-        results = self.query_vector_db(
-            query_texts=[problem],
-            n_results=n_results,
-            search_string=search_string,
-            **kwargs,
-        )
-
-        self._results = results
+We are using chromadb as the default vector database, you can also use mongodb, pgvectordb and qdrantdb
+by simply set `vector_db` to `mongodb`, `pgvector` and `qdrant` in `retrieve_config`, respectively.
 
+To plugin any other dbs, you can also extend class `agentchat.contrib.vectordb.base`,
+check out the code [here](https://github.com/microsoft/autogen/blob/main/autogen/agentchat/contrib/vectordb/base.py).
 
-# Use QdrantRetrieveUserProxyAgent
-qdrantragagent = QdrantRetrieveUserProxyAgent(
-    name="ragproxyagent",
-    human_input_mode="NEVER",
-    max_consecutive_auto_reply=2,
-    retrieve_config={
-        "task": "qa",
-    },
-)
-
-qdrantragagent.retrieve_docs("What is Autogen?", n_results=10, search_string="autogen")
-```
 
 ## Advanced Usage of RAG Agents
 ### Integrate with other agents in a group chat
@@ -340,15 +264,9 @@ def retrieve_content(
     n_results: Annotated[int, "number of results"] = 3,
 ) -> str:
     boss_aid.n_results = n_results  # Set the number of results to be retrieved.
-    # Check if we need to update the context.
-    update_context_case1, update_context_case2 = boss_aid._check_update_context(message)
-    if (update_context_case1 or update_context_case2) and boss_aid.update_context:
-        boss_aid.problem = message if not hasattr(boss_aid, "problem") else boss_aid.problem
-        _, ret_msg = boss_aid._generate_retrieve_user_reply(message)
-    else:
-        _context = {"problem": message, "n_results": n_results}
-        ret_msg = boss_aid.message_generator(boss_aid, None, _context)
-    return ret_msg if ret_msg else message
+    _context = {"problem": message, "n_results": n_results}
+    ret_msg = boss_aid.message_generator(boss_aid, None, _context)
+    return ret_msg or message
 
 for caller in [pm, coder, reviewer]:
     d_retrieve_content = caller.register_for_llm(
@@ -483,4 +401,6 @@ The online app and the source code are hosted in [HuggingFace](https://huggingfa
 You can check out more example notebooks for RAG use cases:
 - [Automated Code Generation and Question Answering with Retrieval Augmented Agents](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat.ipynb)
 - [Group Chat with Retrieval Augmented Generation (with 5 group member agents and 1 manager agent)](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_groupchat_RAG.ipynb)
-- [Automated Code Generation and Question Answering with Qdrant based Retrieval Augmented Agents](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat_qdrant.ipynb)
+- [Using RetrieveChat with Qdrant for Retrieve Augmented Code Generation and Question Answering](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat_qdrant.ipynb)
+- [Using RetrieveChat Powered by PGVector for Retrieve Augmented Code Generation and Question Answering](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat_pgvector.ipynb)
+- [Using RetrieveChat Powered by MongoDB Atlas for Retrieve Augmented Code Generation and Question Answering](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat_mongodb.ipynb)
diff --git a/website/docs/topics/non-openai-models/cloud-gemini.ipynb b/website/docs/topics/non-openai-models/cloud-gemini.ipynb
index 70dc808df616..a227582c592c 100644
--- a/website/docs/topics/non-openai-models/cloud-gemini.ipynb
+++ b/website/docs/topics/non-openai-models/cloud-gemini.ipynb
@@ -94,7 +94,6 @@
     "from autogen import Agent, AssistantAgent, ConversableAgent, UserProxyAgent\n",
     "from autogen.agentchat.contrib.img_utils import _to_pil, get_image_data\n",
     "from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent\n",
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "from autogen.code_utils import DEFAULT_MODEL, UNKNOWN, content_str, execute_code, extract_code, infer_lang"
    ]
diff --git a/website/docs/topics/non-openai-models/cloud-gemini_vertexai.ipynb b/website/docs/topics/non-openai-models/cloud-gemini_vertexai.ipynb
index e618966dc6cc..545f97a2971b 100644
--- a/website/docs/topics/non-openai-models/cloud-gemini_vertexai.ipynb
+++ b/website/docs/topics/non-openai-models/cloud-gemini_vertexai.ipynb
@@ -181,7 +181,6 @@
     "from autogen import Agent, AssistantAgent, ConversableAgent, UserProxyAgent\n",
     "from autogen.agentchat.contrib.img_utils import _to_pil, get_image_data\n",
     "from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent\n",
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "from autogen.code_utils import DEFAULT_MODEL, UNKNOWN, content_str, execute_code, extract_code, infer_lang"
    ]
@@ -391,11 +390,6 @@
     "<img https://github.com/microsoft/autogen/blob/main/website/static/img/autogen_agentchat.png?raw=true>.\"\"\",\n",
     ")"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
   }
  ],
  "metadata": {
diff --git a/website/docs/topics/retrieval_augmentation.md b/website/docs/topics/retrieval_augmentation.md
index 366893cb9825..ebb09f0627e6 100644
--- a/website/docs/topics/retrieval_augmentation.md
+++ b/website/docs/topics/retrieval_augmentation.md
@@ -2,16 +2,20 @@
 
 Retrieval Augmented Generation (RAG) is a powerful technique that combines language models with external knowledge retrieval to improve the quality and relevance of generated responses.
 
-One way to realize RAG in AutoGen is to construct agent chats with `RetrieveAssistantAgent` and `RetrieveUserProxyAgent` classes.
+One way to realize RAG in AutoGen is to construct agent chats with `AssistantAgent` and `RetrieveUserProxyAgent` classes.
 
 ## Example Setup: RAG with Retrieval Augmented Agents
 The following is an example setup demonstrating how to create retrieval augmented agents in AutoGen:
 
-### Step 1. Create an instance of `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`.
+### Step 1. Create an instance of `AssistantAgent` and `RetrieveUserProxyAgent`.
 
 Here `RetrieveUserProxyAgent` instance acts as a proxy agent that retrieves relevant information based on the user's input.
+
+Refer to the [doc](https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/retrieve_user_proxy_agent)
+for more information on the detailed configurations.
+
 ```python
-assistant = RetrieveAssistantAgent(
+assistant = AssistantAgent(
     name="assistant",
     system_message="You are a helpful assistant.",
     llm_config={
@@ -56,14 +60,14 @@ ragproxyagent.initiate_chat(
 ## Example Setup: RAG with Retrieval Augmented Agents with PGVector
 The following is an example setup demonstrating how to create retrieval augmented agents in AutoGen:
 
-### Step 1. Create an instance of `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`.
+### Step 1. Create an instance of `AssistantAgent` and `RetrieveUserProxyAgent`.
 
 Here `RetrieveUserProxyAgent` instance acts as a proxy agent that retrieves relevant information based on the user's input.
 
 Specify the connection_string, or the host, port, database, username, and password in the db_config.
 
 ```python
-assistant = RetrieveAssistantAgent(
+assistant = AssistantAgent(
     name="assistant",
     system_message="You are a helpful assistant.",
     llm_config={