From 5221c3eb9b6735c14bf67a8ed92992ca00dc90d5 Mon Sep 17 00:00:00 2001
From: Wael Karkoub <wael.karkoub96@gmail.com>
Date: Tue, 2 Apr 2024 20:36:22 +0100
Subject: [PATCH 01/12] wip

---
 autogen/oai/client.py         | 42 +++++++++++++++++++++++++---------
 autogen/oai/rate_limiter.py   | 43 +++++++++++++++++++++++++++++++++++
 test/oai/test_client.py       | 28 +++++++++++++++++++++++
 test/oai/test_rate_limiter.py | 34 +++++++++++++++++++++++++++
 4 files changed, 136 insertions(+), 11 deletions(-)
 create mode 100644 autogen/oai/rate_limiter.py
 create mode 100644 test/oai/test_rate_limiter.py

diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index f288ece39610..0143e3788214 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -1,22 +1,22 @@
 from __future__ import annotations
 
-import sys
-from typing import Any, List, Optional, Dict, Callable, Tuple, Union
-import logging
 import inspect
+import logging
+import sys
 import uuid
-from flaml.automl.logger import logger_formatter
+from typing import Any, Callable, Dict, List, Optional, Protocol, Tuple, Union
 
+from flaml.automl.logger import logger_formatter
 from pydantic import BaseModel
-from typing import Protocol
 
 from autogen.cache import Cache
 from autogen.io.base import IOStream
-from autogen.oai.openai_utils import get_key, is_valid_api_key, OAI_PRICE1K
+from autogen.logger.logger_utils import get_current_ts
+from autogen.oai.openai_utils import OAI_PRICE1K, get_key, is_valid_api_key
+from autogen.runtime_logging import log_chat_completion, log_new_client, log_new_wrapper, logging_enabled
 from autogen.token_count_utils import count_token
 
-from autogen.runtime_logging import logging_enabled, log_chat_completion, log_new_client, log_new_wrapper
-from autogen.logger.logger_utils import get_current_ts
+from .rate_limiter import TimeRateLimiter
 
 TOOL_ENABLED = False
 try:
@@ -27,14 +27,15 @@
     AzureOpenAI = object
 else:
     # raises exception if openai>=1 is installed and something is wrong with imports
-    from openai import OpenAI, AzureOpenAI, APIError, APITimeoutError, __version__ as OPENAIVERSION
+    from openai import APIError, APITimeoutError, AzureOpenAI, OpenAI
+    from openai import __version__ as OPENAIVERSION
     from openai.resources import Completions
     from openai.types.chat import ChatCompletion
     from openai.types.chat.chat_completion import ChatCompletionMessage, Choice  # type: ignore [attr-defined]
     from openai.types.chat.chat_completion_chunk import (
+        ChoiceDeltaFunctionCall,
         ChoiceDeltaToolCall,
         ChoiceDeltaToolCallFunction,
-        ChoiceDeltaFunctionCall,
     )
     from openai.types.completion import Completion
     from openai.types.completion_usage import CompletionUsage
@@ -159,7 +160,9 @@ def create(self, params: Dict[str, Any]) -> ChatCompletion:
         """
         iostream = IOStream.get_default()
 
-        completions: Completions = self._oai_client.chat.completions if "messages" in params else self._oai_client.completions  # type: ignore [attr-defined]
+        completions: Completions = (
+            self._oai_client.chat.completions if "messages" in params else self._oai_client.completions
+        )  # type: ignore [attr-defined]
         # If streaming is enabled and has messages, then iterate over the chunks of the response.
         if params.get("stream", False) and "messages" in params:
             response_contents = [""] * params.get("n", 1)
@@ -368,10 +371,18 @@ def __init__(self, *, config_list: Optional[List[Dict[str, Any]]] = None, **base
 
         self._clients: List[ModelClient] = []
         self._config_list: List[Dict[str, Any]] = []
+        self._rate_limiters: List[Optional[TimeRateLimiter]] = []
 
         if config_list:
             config_list = [config.copy() for config in config_list]  # make a copy before modifying
             for config in config_list:
+                # Instantiate the rate limiter
+                if config.get("api_rate_limit") is not None:
+                    self._rate_limiters.append(TimeRateLimiter(config["api_rate_limit"]))
+                    del config["api_rate_limit"]
+                else:
+                    self._rate_limiters.append(None)
+
                 self._register_default_client(config, openai_config)  # could modify the config
                 self._config_list.append(
                     {**extra_kwargs, **{k: v for k, v in config.items() if k not in self.openai_kwargs}}
@@ -622,6 +633,7 @@ def yes_or_no_filter(context, response):
                             return response
                         continue  # filter is not passed; try the next config
             try:
+                self._throttle_api_calls(i)
                 request_ts = get_current_ts()
                 response = client.create(params)
             except APITimeoutError as err:
@@ -897,3 +909,11 @@ def extract_text_or_completion_object(
             A list of text, or a list of ChatCompletion objects if function_call/tool_calls are present.
         """
         return response.message_retrieval_function(response)
+
+    def _throttle_api_calls(self, idx: int) -> None:
+        """Rate limit api calls."""
+        if self._rate_limiters[idx]:
+            limiter = self._rate_limiters[idx]
+
+            assert limiter is not None
+            limiter.wait()
diff --git a/autogen/oai/rate_limiter.py b/autogen/oai/rate_limiter.py
new file mode 100644
index 000000000000..aeff279a0c69
--- /dev/null
+++ b/autogen/oai/rate_limiter.py
@@ -0,0 +1,43 @@
+import time
+import asyncio
+
+
+class TimeRateLimiter:
+    """A class to implement a time-based rate limiter.
+
+    This rate limiter ensures that a certain operation does not exceed a specified frequency.
+    It can be used to limit the rate of requests sent to a server or the rate of any repeated action.
+    """
+
+    def __init__(self, rate: float):
+        """
+        Args:
+            rate (int): The frequency of the time-based rate limiter (NOT time).
+        """
+        self._time_interval_seconds = 1.0 / rate
+        self._last_time_called = 0.0
+
+    def wait(self):
+        """Synchronously waits until enough time has passed to allow the next operation.
+
+        If the elapsed time since the last operation is less than the required time interval,
+        this method will block the execution by sleeping for the remaining time.
+        """
+        if self._elapsed_time() < self._time_interval_seconds:
+            time.sleep(self._time_interval_seconds - self._elapsed_time())
+
+        self._last_time_called = time.perf_counter()
+
+    async def a_wait(self):
+        """Asynchronously waits until enough time has passed to allow the next operation.
+
+        If the elapsed time since the last operation is less than the required time interval,
+        this method will asynchronously sleep for the remaining time, allowing other tasks to run.
+        """
+        if self._elapsed_time() < self._time_interval_seconds:
+            await asyncio.sleep(self._time_interval_seconds - self._elapsed_time())
+
+        self._last_time_called = time.perf_counter()
+
+    def _elapsed_time(self):
+        return time.perf_counter() - self._last_time_called
diff --git a/test/oai/test_client.py b/test/oai/test_client.py
index 10117506cca3..4b2967ba86aa 100755
--- a/test/oai/test_client.py
+++ b/test/oai/test_client.py
@@ -297,6 +297,33 @@ def test_cache():
         assert not os.path.exists(os.path.join(cache_dir, str(LEGACY_DEFAULT_CACHE_SEED)))
 
 
+@pytest.mark.skipif(skip_openai, reason="Requested to skip openai tests.")
+def test_throttled_api_calls():
+    # config_list = config_list_from_json(
+    #     env_or_file=OAI_CONFIG_LIST,
+    #     file_location=KEY_LOC,
+    #     filter_dict={"model": ["gpt-3.5-turbo"]},
+    # )
+    config_list = [{"model": "gpt-3.5-turbo", "api_key": os.environ.get("OPENAI_API_KEY")}]
+
+    # Api calling limited at 0.2 request per second, or 1 request per 5 seconds
+    rate = 1 / 5.0
+
+    # Adding a timeout to catch false positives
+    config_list[0]["timeout"] = 1 / rate
+    config_list[0]["api_rate_limit"] = rate
+
+    client = OpenAIWrapper(config_list=config_list, cache_seed=None)
+
+    n_loops = 2
+    current_time = time.perf_counter()
+    for _ in range(n_loops):
+        client.create(messages=[{"role": "user", "content": "hello"}])
+
+    min_expected_time = (n_loops - 1) / rate
+    assert time.perf_counter() - current_time > min_expected_time
+
+
 if __name__ == "__main__":
     # test_aoai_chat_completion()
     # test_oai_tool_calling_extraction()
@@ -306,3 +333,4 @@ def test_cache():
     # test_usage_summary()
     test_legacy_cache()
     test_cache()
+    test_throttled_api_calls()
diff --git a/test/oai/test_rate_limiter.py b/test/oai/test_rate_limiter.py
new file mode 100644
index 000000000000..5659aa005538
--- /dev/null
+++ b/test/oai/test_rate_limiter.py
@@ -0,0 +1,34 @@
+import pytest
+import time
+from autogen.oai.rate_limiter import TimeRateLimiter
+
+
+def test_time_rate_limiter():
+    current_time_seconds = time.perf_counter()
+
+    rate = 1
+    rate_limiter = TimeRateLimiter(rate)
+
+    n_loops = 2
+    for _ in range(n_loops):
+        rate_limiter.wait()
+
+    total_time = time.perf_counter() - current_time_seconds
+    min_expected_time = (n_loops - 1) / rate
+    assert total_time >= min_expected_time
+
+
+@pytest.mark.asyncio
+async def test_a_time_rate_limiter():
+    current_time_seconds = time.perf_counter()
+
+    rate = 1
+    rate_limiter = TimeRateLimiter(rate)
+
+    n_loops = 2
+    for _ in range(n_loops):
+        await rate_limiter.a_wait()
+
+    total_time = time.perf_counter() - current_time_seconds
+    min_expected_time = (n_loops - 1) / rate
+    assert total_time >= min_expected_time

From a7f40f254299ee4b6f9803c598c0c65d32580041 Mon Sep 17 00:00:00 2001
From: Wael Karkoub <wael.karkoub96@gmail.com>
Date: Fri, 5 Apr 2024 02:42:13 +0100
Subject: [PATCH 02/12] remove async

---
 autogen/oai/rate_limiter.py   | 14 +-------------
 test/oai/test_rate_limiter.py | 16 ----------------
 2 files changed, 1 insertion(+), 29 deletions(-)

diff --git a/autogen/oai/rate_limiter.py b/autogen/oai/rate_limiter.py
index aeff279a0c69..4ef0bd46d672 100644
--- a/autogen/oai/rate_limiter.py
+++ b/autogen/oai/rate_limiter.py
@@ -1,5 +1,4 @@
 import time
-import asyncio
 
 
 class TimeRateLimiter:
@@ -12,7 +11,7 @@ class TimeRateLimiter:
     def __init__(self, rate: float):
         """
         Args:
-            rate (int): The frequency of the time-based rate limiter (NOT time).
+            rate (int): The frequency of the time-based rate limiter (NOT time interval).
         """
         self._time_interval_seconds = 1.0 / rate
         self._last_time_called = 0.0
@@ -28,16 +27,5 @@ def wait(self):
 
         self._last_time_called = time.perf_counter()
 
-    async def a_wait(self):
-        """Asynchronously waits until enough time has passed to allow the next operation.
-
-        If the elapsed time since the last operation is less than the required time interval,
-        this method will asynchronously sleep for the remaining time, allowing other tasks to run.
-        """
-        if self._elapsed_time() < self._time_interval_seconds:
-            await asyncio.sleep(self._time_interval_seconds - self._elapsed_time())
-
-        self._last_time_called = time.perf_counter()
-
     def _elapsed_time(self):
         return time.perf_counter() - self._last_time_called
diff --git a/test/oai/test_rate_limiter.py b/test/oai/test_rate_limiter.py
index 5659aa005538..2e2a81a0d866 100644
--- a/test/oai/test_rate_limiter.py
+++ b/test/oai/test_rate_limiter.py
@@ -16,19 +16,3 @@ def test_time_rate_limiter():
     total_time = time.perf_counter() - current_time_seconds
     min_expected_time = (n_loops - 1) / rate
     assert total_time >= min_expected_time
-
-
-@pytest.mark.asyncio
-async def test_a_time_rate_limiter():
-    current_time_seconds = time.perf_counter()
-
-    rate = 1
-    rate_limiter = TimeRateLimiter(rate)
-
-    n_loops = 2
-    for _ in range(n_loops):
-        await rate_limiter.a_wait()
-
-    total_time = time.perf_counter() - current_time_seconds
-    min_expected_time = (n_loops - 1) / rate
-    assert total_time >= min_expected_time

From 2106e1d62e073184df7393fb8487a488afc41f7f Mon Sep 17 00:00:00 2001
From: Wael Karkoub <wael.karkoub96@gmail.com>
Date: Fri, 5 Apr 2024 02:56:24 +0100
Subject: [PATCH 03/12] minor fix

---
 autogen/oai/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 0143e3788214..fdd7aca647de 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -377,7 +377,7 @@ def __init__(self, *, config_list: Optional[List[Dict[str, Any]]] = None, **base
             config_list = [config.copy() for config in config_list]  # make a copy before modifying
             for config in config_list:
                 # Instantiate the rate limiter
-                if config.get("api_rate_limit") is not None:
+                if "api_rate_limit" in config:
                     self._rate_limiters.append(TimeRateLimiter(config["api_rate_limit"]))
                     del config["api_rate_limit"]
                 else:

From 67a05fcc5acd7df4f8a89e942db914992dfe6cde Mon Sep 17 00:00:00 2001
From: Wael Karkoub <wael.karkoub96@gmail.com>
Date: Fri, 5 Apr 2024 03:15:19 +0100
Subject: [PATCH 04/12] robust timing test

---
 test/oai/test_rate_limiter.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/test/oai/test_rate_limiter.py b/test/oai/test_rate_limiter.py
index 2e2a81a0d866..ed9677bf92e5 100644
--- a/test/oai/test_rate_limiter.py
+++ b/test/oai/test_rate_limiter.py
@@ -1,10 +1,12 @@
-import pytest
 import time
+import pytest
+
 from autogen.oai.rate_limiter import TimeRateLimiter
 
 
-def test_time_rate_limiter():
-    current_time_seconds = time.perf_counter()
+@pytest.mark.parametrize("execution_number", range(5))
+def test_time_rate_limiter(execution_number):
+    current_time_seconds = time.time()
 
     rate = 1
     rate_limiter = TimeRateLimiter(rate)
@@ -13,6 +15,6 @@ def test_time_rate_limiter():
     for _ in range(n_loops):
         rate_limiter.wait()
 
-    total_time = time.perf_counter() - current_time_seconds
+    total_time = time.time() - current_time_seconds
     min_expected_time = (n_loops - 1) / rate
     assert total_time >= min_expected_time

From c0b5c550459b2481c4f3d563efd0bbab2b16654d Mon Sep 17 00:00:00 2001
From: Wael Karkoub <wael.karkoub96@gmail.com>
Date: Mon, 15 Apr 2024 19:42:25 +0100
Subject: [PATCH 05/12] modifies file names

---
 autogen/oai/client.py                                    | 2 +-
 autogen/oai/{rate_limiter.py => rate_limiters.py}        | 0
 test/oai/test_client.py                                  | 4 ++--
 test/oai/{test_rate_limiter.py => test_rate_limiters.py} | 7 ++++---
 4 files changed, 7 insertions(+), 6 deletions(-)
 rename autogen/oai/{rate_limiter.py => rate_limiters.py} (100%)
 rename test/oai/{test_rate_limiter.py => test_rate_limiters.py} (68%)

diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 847b5e2899b6..a644014027d9 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -16,7 +16,7 @@
 from autogen.runtime_logging import log_chat_completion, log_new_client, log_new_wrapper, logging_enabled
 from autogen.token_count_utils import count_token
 
-from .rate_limiter import TimeRateLimiter
+from .rate_limiters import TimeRateLimiter
 
 TOOL_ENABLED = False
 try:
diff --git a/autogen/oai/rate_limiter.py b/autogen/oai/rate_limiters.py
similarity index 100%
rename from autogen/oai/rate_limiter.py
rename to autogen/oai/rate_limiters.py
diff --git a/test/oai/test_client.py b/test/oai/test_client.py
index 703103a62442..064af3a6a983 100755
--- a/test/oai/test_client.py
+++ b/test/oai/test_client.py
@@ -12,7 +12,7 @@
 from autogen.oai.client import LEGACY_CACHE_DIR, LEGACY_DEFAULT_CACHE_SEED
 
 sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
-from conftest import skip_openai  # noqa: E402
+from conftest import reason, skip_openai  # noqa: E402
 
 TOOL_ENABLED = False
 try:
@@ -299,7 +299,7 @@ def test_cache():
         assert not os.path.exists(os.path.join(cache_dir, str(LEGACY_DEFAULT_CACHE_SEED)))
 
 
-@pytest.mark.skipif(skip_openai, reason="Requested to skip openai tests.")
+@pytest.mark.skipif(skip_openai, reason=reason)
 def test_throttled_api_calls():
     # config_list = config_list_from_json(
     #     env_or_file=OAI_CONFIG_LIST,
diff --git a/test/oai/test_rate_limiter.py b/test/oai/test_rate_limiters.py
similarity index 68%
rename from test/oai/test_rate_limiter.py
rename to test/oai/test_rate_limiters.py
index ed9677bf92e5..cf40270e3c4c 100644
--- a/test/oai/test_rate_limiter.py
+++ b/test/oai/test_rate_limiters.py
@@ -1,11 +1,12 @@
 import time
+
 import pytest
 
-from autogen.oai.rate_limiter import TimeRateLimiter
+from autogen.oai.rate_limiters import TimeRateLimiter
 
 
-@pytest.mark.parametrize("execution_number", range(5))
-def test_time_rate_limiter(execution_number):
+@pytest.mark.parametrize("execute_n_times", range(5))
+def test_time_rate_limiter(execute_n_times):
     current_time_seconds = time.time()
 
     rate = 1

From 2076979a3b445c4e8b54cd8ec719d165171590fa Mon Sep 17 00:00:00 2001
From: Wael Karkoub <wael.karkoub96@gmail.com>
Date: Mon, 15 Apr 2024 19:43:42 +0100
Subject: [PATCH 06/12] modifies config list

---
 test/oai/test_client.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/test/oai/test_client.py b/test/oai/test_client.py
index 064af3a6a983..987e4bc0e015 100755
--- a/test/oai/test_client.py
+++ b/test/oai/test_client.py
@@ -301,12 +301,11 @@ def test_cache():
 
 @pytest.mark.skipif(skip_openai, reason=reason)
 def test_throttled_api_calls():
-    # config_list = config_list_from_json(
-    #     env_or_file=OAI_CONFIG_LIST,
-    #     file_location=KEY_LOC,
-    #     filter_dict={"model": ["gpt-3.5-turbo"]},
-    # )
-    config_list = [{"model": "gpt-3.5-turbo", "api_key": os.environ.get("OPENAI_API_KEY")}]
+    config_list = config_list_from_json(
+        env_or_file=OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+        filter_dict={"model": ["gpt-3.5-turbo"]},
+    )
 
     # Api calling limited at 0.2 request per second, or 1 request per 5 seconds
     rate = 1 / 5.0

From 2508235b22d2a80e4951e4c4c941f25a5939a43f Mon Sep 17 00:00:00 2001
From: Wael Karkoub <wael.karkoub96@gmail.com>
Date: Mon, 15 Apr 2024 19:49:34 +0100
Subject: [PATCH 07/12] updates faq

---
 website/docs/FAQ.mdx | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/website/docs/FAQ.mdx b/website/docs/FAQ.mdx
index ee5331f4594b..a7d7ab025e3e 100644
--- a/website/docs/FAQ.mdx
+++ b/website/docs/FAQ.mdx
@@ -37,7 +37,15 @@ Yes. You currently have two options:
 - Autogen can work with any API endpoint which complies with OpenAI-compatible RESTful APIs - e.g. serving local LLM via FastChat or LM Studio. Please check https://microsoft.github.io/autogen/blog/2023/07/14/Local-LLMs for an example.
 - You can supply your own custom model implementation and use it with Autogen. Please check https://microsoft.github.io/autogen/blog/2024/01/26/Custom-Models for more information.
 
-## Handle Rate Limit Error and Timeout Error
+## Handling API Rate Limits
+
+### Setting the API Rate Limit
+
+You can set the `api_rate_limit` in a `config_list` for an agent, which will be used to control the rate at which API requests are sent.
+
+- `api_rate_limit` (float): the maximum number of API requests allowed per second.
+
+### Handle Rate Limit Error and Timeout Error
 
 You can set `max_retries` to handle rate limit error. And you can set `timeout` to handle timeout error. They can all be specified in `llm_config` for an agent, which will be used in the OpenAI client for LLM inference. They can be set differently for different clients if they are set in the `config_list`.
 

From af651ba842340ad307dc857891490b6b9ac4cdcd Mon Sep 17 00:00:00 2001
From: Wael Karkoub <wael.karkoub96@gmail.com>
Date: Mon, 22 Apr 2024 14:32:51 +0100
Subject: [PATCH 08/12] updates llm config doc

---
 website/docs/topics/llm_configuration.ipynb | 685 ++++++++++----------
 1 file changed, 344 insertions(+), 341 deletions(-)

diff --git a/website/docs/topics/llm_configuration.ipynb b/website/docs/topics/llm_configuration.ipynb
index 518092ecfbac..68d11e3f40d8 100644
--- a/website/docs/topics/llm_configuration.ipynb
+++ b/website/docs/topics/llm_configuration.ipynb
@@ -1,342 +1,345 @@
 {
-    "cells": [
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "# LLM Configuration\n",
-       "\n",
-       "In AutoGen, agents use LLMs as key components to understand and react. To configure an agent's access to LLMs, you can specify an `llm_config` argument in its constructor. For example, the following snippet shows a configuration that uses `gpt-4`:"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": 2,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "import os\n",
-       "\n",
-       "llm_config = {\n",
-       "    \"config_list\": [{\"model\": \"gpt-4\", \"api_key\": os.environ[\"OPENAI_API_KEY\"]}],\n",
-       "}"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "````{=mdx}\n",
-       ":::warning\n",
-       "It is important to never commit secrets into your code, therefore we read the OpenAI API key from an environment variable.\n",
-       ":::\n",
-       "````\n",
-       "\n",
-       "This `llm_config` can then be passed to an agent's constructor to enable it to use the LLM."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "import autogen\n",
-       "\n",
-       "assistant = autogen.AssistantAgent(name=\"assistant\", llm_config=llm_config)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "\n",
-       "## Introduction to `config_list`\n",
-       "\n",
-       "Different tasks may require different models, and the `config_list` allows specifying the different endpoints and configurations that are to be used. It is a list of dictionaries, each of which contains the following keys depending on the kind of endpoint being used:\n",
-       "\n",
-       "````{=mdx}\n",
-       "import Tabs from '@theme/Tabs';\n",
-       "import TabItem from '@theme/TabItem';\n",
-       "\n",
-       "<Tabs>\n",
-       "  <TabItem value=\"openai\" label=\"OpenAI\" default>\n",
-       "    - `model` (str, required): The identifier of the model to be used, such as 'gpt-4', 'gpt-3.5-turbo'.\n",
-       "    - `api_key` (str, optional): The API key required for authenticating requests to the model's API endpoint.\n",
-       "    - `base_url` (str, optional): The base URL of the API endpoint. This is the root address where API calls are directed.\n",
-       "    - `tags` (List[str], optional): Tags which can be used for filtering.\n",
-       "\n",
-       "    Example:\n",
-       "    ```json\n",
-       "    [\n",
-       "      {\n",
-       "        \"model\": \"gpt-4\",\n",
-       "        \"api_key\": os.environ['OPENAI_API_KEY']\n",
-       "      }\n",
-       "    ]\n",
-       "    ```\n",
-       "  </TabItem>\n",
-       "  <TabItem value=\"azureopenai\" label=\"Azure OpenAI\">\n",
-       "    - `model` (str, required): The deployment to be used. The model corresponds to the deployment name on Azure OpenAI.\n",
-       "    - `api_key` (str, optional): The API key required for authenticating requests to the model's API endpoint.\n",
-       "    - `api_type`: `azure`\n",
-       "    - `base_url` (str, optional): The base URL of the API endpoint. This is the root address where API calls are directed.\n",
-       "    - `api_version` (str, optional): The version of the Azure API you wish to use.\n",
-       "    - `tags` (List[str], optional): Tags which can be used for filtering.\n",
-       "\n",
-       "    Example:\n",
-       "    ```json\n",
-       "    [\n",
-       "      {\n",
-       "        \"model\": \"my-gpt-4-deployment\",\n",
-       "        \"api_type\": \"azure\",\n",
-       "        \"api_key\": os.environ['AZURE_OPENAI_API_KEY'],\n",
-       "        \"base_url\": \"https://ENDPOINT.openai.azure.com/\",\n",
-       "        \"api_version\": \"2024-02-15-preview\"\n",
-       "      }\n",
-       "    ]\n",
-       "    ```\n",
-       "  </TabItem>\n",
-       "  <TabItem value=\"other\" label=\"Other OpenAI compatible\">\n",
-       "    - `model` (str, required): The identifier of the model to be used, such as 'llama-7B'.\n",
-       "    - `api_key` (str, optional): The API key required for authenticating requests to the model's API endpoint.\n",
-       "    - `base_url` (str, optional): The base URL of the API endpoint. This is the root address where API calls are directed.\n",
-       "    - `tags` (List[str], optional): Tags which can be used for filtering.\n",
-       "\n",
-       "    Example:\n",
-       "    ```json\n",
-       "    [\n",
-       "      {\n",
-       "        \"model\": \"llama-7B\",\n",
-       "        \"base_url\": \"http://localhost:1234\"\n",
-       "      }\n",
-       "    ]\n",
-       "    ```\n",
-       "  </TabItem>\n",
-       "</Tabs>\n",
-       "````\n",
-       "\n",
-       "---\n",
-       "\n",
-       "````{=mdx}\n",
-       ":::tip\n",
-       "By default this will create a model client which assumes an OpenAI API (or compatible) endpoint. To use custom model clients, see [here](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_custom_model.ipynb).\n",
-       ":::\n",
-       "````\n",
-       "\n",
-       "### `OAI_CONFIG_LIST` pattern\n",
-       "\n",
-       "A common, useful pattern used is to define this `config_list` is via JSON (specified as a file or an environment variable set to a JSON-formatted string) and then use the [`config_list_from_json`](/docs/reference/oai/openai_utils#config_list_from_json) helper function to load it:"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "config_list = autogen.config_list_from_json(\n",
-       "    env_or_file=\"OAI_CONFIG_LIST\",\n",
-       ")\n",
-       "\n",
-       "# Then, create the assistant agent with the config list\n",
-       "assistant = autogen.AssistantAgent(name=\"assistant\", llm_config={\"config_list\": config_list})"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "This can be helpful as it keeps all the configuration in one place across different projects or notebooks.\n",
-       "\n",
-       "This function interprets the `env_or_file` argument as follows:\n",
-       "\n",
-       "- If `env_or_file` is an environment variable then:\n",
-       "    - It will first try to load the file from the path specified in the environment variable.\n",
-       "    - If there is no file, it will try to interpret the environment variable as a JSON string.\n",
-       "- Otherwise, it will try to open the file at the path specified by `env_or_file`.\n",
-       "\n",
-       "### Why is it a list?\n",
-       "\n",
-       "Being a list allows you to define multiple models that can be used by the agent. This is useful for a few reasons:\n",
-       "\n",
-       "- If one model times out or fails, the agent can try another model.\n",
-       "- Having a single global list of models and [filtering it](#config-list-filtering) based on certain keys (e.g. name, tag) in order to pass select models into a certain agent (e.g. use cheaper GPT 3.5 for agents solving easier tasks)\n",
-       "- While the core agents, (e.g. conversable or assistant) do not have special logic around selecting configs, some of the specialized agents *may* have logic to select the best model based on the task at hand.\n",
-       "\n",
-       "### How does an agent decide which model to pick out of the list?\n",
-       "\n",
-       "An agent uses the very first model available in the \"config_list\" and makes LLM calls against this model. If the model fails (e.g. API throttling) the agent will retry the request against the 2nd model and so on until prompt completion is received (or throws an error if none of the models successfully completes the request). In general there's no implicit/hidden logic inside agents that is used to pick \"the best model for the task\". However, some specialized agents may attempt to choose \"the best model for the task\". It is developers responsibility to pick the right models and use them with agents.\n",
-       "\n",
-       "### Config list filtering\n",
-       "\n",
-       "As described above the list can be filtered based on certain criteria. This is defined as a dictionary of key to filter on and value to filter by. For example, if you have a list of configs and you want to select the one with the model \"gpt-3.5-turbo\" you can use the following filter:"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "filter_dict = {\"model\": \"gpt-3.5-turbo\"}"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "\n",
-       "This can then be applied to a config list loaded in Python with [`filter_config`](/docs/reference/oai/openai_utils#filter_config):"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "config_list = autogen.filter_config(config_list, filter_dict)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "Or, directly when loading the config list using [`config_list_from_json`](/docs/reference/oai/openai_utils#config_list_from_json):"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "config_list = autogen.config_list_from_json(env_or_file=\"OAI_CONFIG_LIST\", filter_dict=filter_dict)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "#### Tags\n",
-       "\n",
-       "Model names can differ between OpenAI and Azure OpenAI, so tags offer an easy way to smooth over this inconsistency. Tags are a list of strings in the `config_list`, for example for the following `config_list`:"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": 1,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "config_list = [\n",
-       "    {\"model\": \"my-gpt-4-deployment\", \"api_key\": \"\", \"tags\": [\"gpt4\", \"openai\"]},\n",
-       "    {\"model\": \"llama-7B\", \"base_url\": \"http://127.0.0.1:8080\", \"tags\": [\"llama\", \"local\"]},\n",
-       "]"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "Then when filtering the `config_list` you can can specify the desired tags. A config is selected if it has at least one of the tags specified in the filter. For example, to just get the `llama` model, you can use the following filter:"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": 3,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "filter_dict = {\"tags\": [\"llama\", \"another_tag\"]}\n",
-       "config_list = autogen.filter_config(config_list, filter_dict)\n",
-       "assert len(config_list) == 1"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "## Other configuration parameters\n",
-       "\n",
-       "Besides the `config_list`, there are other parameters that can be used to configure the LLM. These are split between parameters specifically used by Autogen and those passed into the model client.\n",
-       "\n",
-       "### AutoGen specific parameters\n",
-       "\n",
-       "- `cache_seed` - This is a legacy parameter and not recommended to be used unless the reason for using it is to disable the default caching behavior. To disable default caching, set this to `None`. Otherwise, by default or if an int is passed the [DiskCache](/docs/reference/cache/disk_cache) will be used. For the new way of using caching, pass a [Cache](/docs/reference/cache/) object into [`initiate_chat`](/docs/reference/agentchat/conversable_agent#initiate_chat).\n",
-       "\n",
-       "### Extra model client parameters\n",
-       "\n",
-       "It is also possible to passthrough parameters through to the OpenAI client. Parameters that correspond to the [`OpenAI` client](https://github.com/openai/openai-python/blob/d231d1fa783967c1d3a1db3ba1b52647fff148ac/src/openai/_client.py#L67) or the [`OpenAI` completions create API](https://github.com/openai/openai-python/blob/d231d1fa783967c1d3a1db3ba1b52647fff148ac/src/openai/resources/completions.py#L35) can be supplied.\n",
-       "\n",
-       "This is commonly used for things like `temperature`, or `timeout`.\n",
-       "\n",
-       "## Example\n",
-       "\n"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "llm_config = {\n",
-       "    \"config_list\": [\n",
-       "        {\n",
-       "            \"model\": \"my-gpt-4-deployment\",\n",
-       "            \"api_key\": os.environ.get(\"AZURE_OPENAI_API_KEY\"),\n",
-       "            \"api_type\": \"azure\",\n",
-       "            \"base_url\": os.environ.get(\"AZURE_OPENAI_API_BASE\"),\n",
-       "            \"api_version\": \"2024-02-15-preview\",\n",
-       "        },\n",
-       "        {\n",
-       "            \"model\": \"llama-7B\",\n",
-       "            \"base_url\": \"http://127.0.0.1:8080\",\n",
-       "            \"api_type\": \"openai\",\n",
-       "        },\n",
-       "    ],\n",
-       "    \"temperature\": 0.9,\n",
-       "    \"timeout\": 300,\n",
-       "}"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "## Other helpers for loading a config list\n",
-       "\n",
-       "- [`get_config_list`](/docs/reference/oai/openai_utils#get_config_list): Generates configurations for API calls, primarily from provided API keys.\n",
-       "- [`config_list_openai_aoai`](/docs/reference/oai/openai_utils#config_list_openai_aoai): Constructs a list of configurations using both Azure OpenAI and OpenAI endpoints, sourcing API keys from environment variables or local files.\n",
-       "- [`config_list_from_models`](/docs/reference/oai/openai_utils#config_list_from_models): Creates configurations based on a provided list of models, useful when targeting specific models without manually specifying each configuration.\n",
-       "- [`config_list_from_dotenv`](/docs/reference/oai/openai_utils#config_list_from_dotenv): Constructs a configuration list from a `.env` file, offering a consolidated way to manage multiple API configurations and keys from a single file.\n",
-       "\n",
-       "See [this notebook](https://github.com/microsoft/autogen/blob/main/notebook/config_loader_utility_functions.ipynb) for examples of using the above functions."
-      ]
-     }
-    ],
-    "metadata": {
-     "kernelspec": {
-      "display_name": "masterclass",
-      "language": "python",
-      "name": "python3"
-     },
-     "language_info": {
-      "codemirror_mode": {
-       "name": "ipython",
-       "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.11.7"
-     },
-     "orig_nbformat": 4
-    },
-    "nbformat": 4,
-    "nbformat_minor": 2
-   }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# LLM Configuration\n",
+    "\n",
+    "In AutoGen, agents use LLMs as key components to understand and react. To configure an agent's access to LLMs, you can specify an `llm_config` argument in its constructor. For example, the following snippet shows a configuration that uses `gpt-4`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "llm_config = {\n",
+    "    \"config_list\": [{\"model\": \"gpt-4\", \"api_key\": os.environ[\"OPENAI_API_KEY\"]}],\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "````{=mdx}\n",
+    ":::warning\n",
+    "It is important to never commit secrets into your code, therefore we read the OpenAI API key from an environment variable.\n",
+    ":::\n",
+    "````\n",
+    "\n",
+    "This `llm_config` can then be passed to an agent's constructor to enable it to use the LLM."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import autogen\n",
+    "\n",
+    "assistant = autogen.AssistantAgent(name=\"assistant\", llm_config=llm_config)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "## Introduction to `config_list`\n",
+    "\n",
+    "Different tasks may require different models, and the `config_list` allows specifying the different endpoints and configurations that are to be used. It is a list of dictionaries, each of which contains the following keys depending on the kind of endpoint being used:\n",
+    "\n",
+    "````{=mdx}\n",
+    "import Tabs from '@theme/Tabs';\n",
+    "import TabItem from '@theme/TabItem';\n",
+    "\n",
+    "<Tabs>\n",
+    "  <TabItem value=\"openai\" label=\"OpenAI\" default>\n",
+    "    - `model` (str, required): The identifier of the model to be used, such as 'gpt-4', 'gpt-3.5-turbo'.\n",
+    "    - `api_key` (str, optional): The API key required for authenticating requests to the model's API endpoint.\n",
+    "    - `api_rate_limit` (float, optional): Specifies the maximum number of API requests permitted per second.\n",
+    "    - `base_url` (str, optional): The base URL of the API endpoint. This is the root address where API calls are directed.\n",
+    "    - `tags` (List[str], optional): Tags which can be used for filtering.\n",
+    "\n",
+    "    Example:\n",
+    "    ```json\n",
+    "    [\n",
+    "      {\n",
+    "        \"model\": \"gpt-4\",\n",
+    "        \"api_key\": os.environ['OPENAI_API_KEY']\n",
+    "        \"api_rate_limit\": 60.0, // Set to allow up to 60 API requests per second.\n",
+    "      }\n",
+    "    ]\n",
+    "    ```\n",
+    "  </TabItem>\n",
+    "  <TabItem value=\"azureopenai\" label=\"Azure OpenAI\">\n",
+    "    - `model` (str, required): The deployment to be used. The model corresponds to the deployment name on Azure OpenAI.\n",
+    "    - `api_key` (str, optional): The API key required for authenticating requests to the model's API endpoint.\n",
+    "    - `api_type`: `azure`.\n",
+    "    - `api_rate_limit` (float, optional): Specifies the maximum number of API requests permitted per second.\n",
+    "    - `base_url` (str, optional): The base URL of the API endpoint. This is the root address where API calls are directed.\n",
+    "    - `api_version` (str, optional): The version of the Azure API you wish to use.\n",
+    "    - `tags` (List[str], optional): Tags which can be used for filtering.\n",
+    "\n",
+    "    Example:\n",
+    "    ```json\n",
+    "    [\n",
+    "      {\n",
+    "        \"model\": \"my-gpt-4-deployment\",\n",
+    "        \"api_type\": \"azure\",\n",
+    "        \"api_key\": os.environ['AZURE_OPENAI_API_KEY'],\n",
+    "        \"base_url\": \"https://ENDPOINT.openai.azure.com/\",\n",
+    "        \"api_version\": \"2024-02-15-preview\"\n",
+    "      }\n",
+    "    ]\n",
+    "    ```\n",
+    "  </TabItem>\n",
+    "  <TabItem value=\"other\" label=\"Other OpenAI compatible\">\n",
+    "    - `model` (str, required): The identifier of the model to be used, such as 'llama-7B'.\n",
+    "    - `api_key` (str, optional): The API key required for authenticating requests to the model's API endpoint.\n",
+    "    - `api_rate_limit` (float, optional): Specifies the maximum number of API requests permitted per second.\n",
+    "    - `base_url` (str, optional): The base URL of the API endpoint. This is the root address where API calls are directed.\n",
+    "    - `tags` (List[str], optional): Tags which can be used for filtering.\n",
+    "\n",
+    "    Example:\n",
+    "    ```json\n",
+    "    [\n",
+    "      {\n",
+    "        \"model\": \"llama-7B\",\n",
+    "        \"base_url\": \"http://localhost:1234\"\n",
+    "      }\n",
+    "    ]\n",
+    "    ```\n",
+    "  </TabItem>\n",
+    "</Tabs>\n",
+    "````\n",
+    "\n",
+    "---\n",
+    "\n",
+    "````{=mdx}\n",
+    ":::tip\n",
+    "By default this will create a model client which assumes an OpenAI API (or compatible) endpoint. To use custom model clients, see [here](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_custom_model.ipynb).\n",
+    ":::\n",
+    "````\n",
+    "\n",
+    "### `OAI_CONFIG_LIST` pattern\n",
+    "\n",
+    "A common, useful pattern used is to define this `config_list` is via JSON (specified as a file or an environment variable set to a JSON-formatted string) and then use the [`config_list_from_json`](/docs/reference/oai/openai_utils#config_list_from_json) helper function to load it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config_list = autogen.config_list_from_json(\n",
+    "    env_or_file=\"OAI_CONFIG_LIST\",\n",
+    ")\n",
+    "\n",
+    "# Then, create the assistant agent with the config list\n",
+    "assistant = autogen.AssistantAgent(name=\"assistant\", llm_config={\"config_list\": config_list})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This can be helpful as it keeps all the configuration in one place across different projects or notebooks.\n",
+    "\n",
+    "This function interprets the `env_or_file` argument as follows:\n",
+    "\n",
+    "- If `env_or_file` is an environment variable then:\n",
+    "    - It will first try to load the file from the path specified in the environment variable.\n",
+    "    - If there is no file, it will try to interpret the environment variable as a JSON string.\n",
+    "- Otherwise, it will try to open the file at the path specified by `env_or_file`.\n",
+    "\n",
+    "### Why is it a list?\n",
+    "\n",
+    "Being a list allows you to define multiple models that can be used by the agent. This is useful for a few reasons:\n",
+    "\n",
+    "- If one model times out or fails, the agent can try another model.\n",
+    "- Having a single global list of models and [filtering it](#config-list-filtering) based on certain keys (e.g. name, tag) in order to pass select models into a certain agent (e.g. use cheaper GPT 3.5 for agents solving easier tasks)\n",
+    "- While the core agents, (e.g. conversable or assistant) do not have special logic around selecting configs, some of the specialized agents *may* have logic to select the best model based on the task at hand.\n",
+    "\n",
+    "### How does an agent decide which model to pick out of the list?\n",
+    "\n",
+    "An agent uses the very first model available in the \"config_list\" and makes LLM calls against this model. If the model fails (e.g. API throttling) the agent will retry the request against the 2nd model and so on until prompt completion is received (or throws an error if none of the models successfully completes the request). In general there's no implicit/hidden logic inside agents that is used to pick \"the best model for the task\". However, some specialized agents may attempt to choose \"the best model for the task\". It is developers responsibility to pick the right models and use them with agents.\n",
+    "\n",
+    "### Config list filtering\n",
+    "\n",
+    "As described above the list can be filtered based on certain criteria. This is defined as a dictionary of key to filter on and value to filter by. For example, if you have a list of configs and you want to select the one with the model \"gpt-3.5-turbo\" you can use the following filter:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filter_dict = {\"model\": \"gpt-3.5-turbo\"}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "This can then be applied to a config list loaded in Python with [`filter_config`](/docs/reference/oai/openai_utils#filter_config):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config_list = autogen.filter_config(config_list, filter_dict)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Or, directly when loading the config list using [`config_list_from_json`](/docs/reference/oai/openai_utils#config_list_from_json):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config_list = autogen.config_list_from_json(env_or_file=\"OAI_CONFIG_LIST\", filter_dict=filter_dict)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Tags\n",
+    "\n",
+    "Model names can differ between OpenAI and Azure OpenAI, so tags offer an easy way to smooth over this inconsistency. Tags are a list of strings in the `config_list`, for example for the following `config_list`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config_list = [\n",
+    "    {\"model\": \"my-gpt-4-deployment\", \"api_key\": \"\", \"tags\": [\"gpt4\", \"openai\"]},\n",
+    "    {\"model\": \"llama-7B\", \"base_url\": \"http://127.0.0.1:8080\", \"tags\": [\"llama\", \"local\"]},\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then when filtering the `config_list` you can can specify the desired tags. A config is selected if it has at least one of the tags specified in the filter. For example, to just get the `llama` model, you can use the following filter:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filter_dict = {\"tags\": [\"llama\", \"another_tag\"]}\n",
+    "config_list = autogen.filter_config(config_list, filter_dict)\n",
+    "assert len(config_list) == 1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Other configuration parameters\n",
+    "\n",
+    "Besides the `config_list`, there are other parameters that can be used to configure the LLM. These are split between parameters specifically used by Autogen and those passed into the model client.\n",
+    "\n",
+    "### AutoGen specific parameters\n",
+    "\n",
+    "- `cache_seed` - This is a legacy parameter and not recommended to be used unless the reason for using it is to disable the default caching behavior. To disable default caching, set this to `None`. Otherwise, by default or if an int is passed the [DiskCache](/docs/reference/cache/disk_cache) will be used. For the new way of using caching, pass a [Cache](/docs/reference/cache/) object into [`initiate_chat`](/docs/reference/agentchat/conversable_agent#initiate_chat).\n",
+    "\n",
+    "### Extra model client parameters\n",
+    "\n",
+    "It is also possible to passthrough parameters through to the OpenAI client. Parameters that correspond to the [`OpenAI` client](https://github.com/openai/openai-python/blob/d231d1fa783967c1d3a1db3ba1b52647fff148ac/src/openai/_client.py#L67) or the [`OpenAI` completions create API](https://github.com/openai/openai-python/blob/d231d1fa783967c1d3a1db3ba1b52647fff148ac/src/openai/resources/completions.py#L35) can be supplied.\n",
+    "\n",
+    "This is commonly used for things like `temperature`, or `timeout`.\n",
+    "\n",
+    "## Example\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm_config = {\n",
+    "    \"config_list\": [\n",
+    "        {\n",
+    "            \"model\": \"my-gpt-4-deployment\",\n",
+    "            \"api_key\": os.environ.get(\"AZURE_OPENAI_API_KEY\"),\n",
+    "            \"api_type\": \"azure\",\n",
+    "            \"base_url\": os.environ.get(\"AZURE_OPENAI_API_BASE\"),\n",
+    "            \"api_version\": \"2024-02-15-preview\",\n",
+    "        },\n",
+    "        {\n",
+    "            \"model\": \"llama-7B\",\n",
+    "            \"base_url\": \"http://127.0.0.1:8080\",\n",
+    "            \"api_type\": \"openai\",\n",
+    "        },\n",
+    "    ],\n",
+    "    \"temperature\": 0.9,\n",
+    "    \"timeout\": 300,\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Other helpers for loading a config list\n",
+    "\n",
+    "- [`get_config_list`](/docs/reference/oai/openai_utils#get_config_list): Generates configurations for API calls, primarily from provided API keys.\n",
+    "- [`config_list_openai_aoai`](/docs/reference/oai/openai_utils#config_list_openai_aoai): Constructs a list of configurations using both Azure OpenAI and OpenAI endpoints, sourcing API keys from environment variables or local files.\n",
+    "- [`config_list_from_models`](/docs/reference/oai/openai_utils#config_list_from_models): Creates configurations based on a provided list of models, useful when targeting specific models without manually specifying each configuration.\n",
+    "- [`config_list_from_dotenv`](/docs/reference/oai/openai_utils#config_list_from_dotenv): Constructs a configuration list from a `.env` file, offering a consolidated way to manage multiple API configurations and keys from a single file.\n",
+    "\n",
+    "See [this notebook](https://github.com/microsoft/autogen/blob/main/notebook/config_loader_utility_functions.ipynb) for examples of using the above functions."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From e1408be1a0350ef5368c90b421f304cd3c508384 Mon Sep 17 00:00:00 2001
From: Wael Karkoub <wael.karkoub96@gmail.com>
Date: Mon, 22 Apr 2024 14:53:06 +0100
Subject: [PATCH 09/12] mock openai calls

---
 test/oai/test_client.py | 56 +++++++++++++++++++++++++++++++++--------
 1 file changed, 46 insertions(+), 10 deletions(-)

diff --git a/test/oai/test_client.py b/test/oai/test_client.py
index 987e4bc0e015..8ca139abb24c 100755
--- a/test/oai/test_client.py
+++ b/test/oai/test_client.py
@@ -4,6 +4,8 @@
 import shutil
 import sys
 import time
+from types import SimpleNamespace
+from unittest.mock import patch
 
 import pytest
 
@@ -31,6 +33,40 @@
 OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
 
 
+class _MockClient:
+    def __init__(self, config, **kwargs):
+        pass
+
+    def create(self, params):
+        # can create my own data response class
+        # here using SimpleNamespace for simplicity
+        # as long as it adheres to the ModelClientResponseProtocol
+
+        response = SimpleNamespace()
+        response.choices = []
+        response.model = "mock_model"
+
+        text = "this is a dummy text response"
+        choice = SimpleNamespace()
+        choice.message = SimpleNamespace()
+        choice.message.content = text
+        choice.message.function_call = None
+        response.choices.append(choice)
+        return response
+
+    def message_retrieval(self, response):
+        choices = response.choices
+        return [choice.message.content for choice in choices]
+
+    def cost(self, response) -> float:
+        response.cost = 0
+        return 0
+
+    @staticmethod
+    def get_usage(response):
+        return {}
+
+
 @pytest.mark.skipif(skip, reason="openai>=1 not installed")
 def test_aoai_chat_completion():
     config_list = config_list_from_json(
@@ -299,22 +335,22 @@ def test_cache():
         assert not os.path.exists(os.path.join(cache_dir, str(LEGACY_DEFAULT_CACHE_SEED)))
 
 
-@pytest.mark.skipif(skip_openai, reason=reason)
 def test_throttled_api_calls():
-    config_list = config_list_from_json(
-        env_or_file=OAI_CONFIG_LIST,
-        file_location=KEY_LOC,
-        filter_dict={"model": ["gpt-3.5-turbo"]},
-    )
-
     # Api calling limited at 0.2 request per second, or 1 request per 5 seconds
     rate = 1 / 5.0
 
-    # Adding a timeout to catch false positives
-    config_list[0]["timeout"] = 1 / rate
-    config_list[0]["api_rate_limit"] = rate
+    config_list = [
+        {
+            "model": "mock_model",
+            "model_client_cls": "_MockClient",
+            # Adding a timeout to catch false positives
+            "timeout": 1 / rate,
+            "api_rate_limit": rate,
+        }
+    ]
 
     client = OpenAIWrapper(config_list=config_list, cache_seed=None)
+    client.register_model_client(_MockClient)
 
     n_loops = 2
     current_time = time.perf_counter()

From 7af32c53b77d979991d1d4e1a83831407f0322ba Mon Sep 17 00:00:00 2001
From: Wael Karkoub <wael.karkoub96@gmail.com>
Date: Mon, 22 Apr 2024 14:58:58 +0100
Subject: [PATCH 10/12] make windows tests happy

---
 test/oai/test_client.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/oai/test_client.py b/test/oai/test_client.py
index 8ca139abb24c..b1c0181bd420 100755
--- a/test/oai/test_client.py
+++ b/test/oai/test_client.py
@@ -353,12 +353,12 @@ def test_throttled_api_calls():
     client.register_model_client(_MockClient)
 
     n_loops = 2
-    current_time = time.perf_counter()
+    current_time = time.time()
     for _ in range(n_loops):
         client.create(messages=[{"role": "user", "content": "hello"}])
 
     min_expected_time = (n_loops - 1) / rate
-    assert time.perf_counter() - current_time > min_expected_time
+    assert time.time() - current_time > min_expected_time
 
 
 if __name__ == "__main__":

From 896f6ea75916994caf9ee0510d171a7b99a1295e Mon Sep 17 00:00:00 2001
From: Wael Karkoub <wael.karkoub96@gmail.com>
Date: Tue, 4 Jun 2024 16:03:01 +0100
Subject: [PATCH 11/12] clean up

---
 autogen/oai/client.py          | 24 ++++++++++++++----------
 autogen/oai/rate_limiters.py   |  7 ++++++-
 test/oai/test_rate_limiters.py |  2 +-
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 451e26ff3a5d..68e8bf69aa99 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -16,7 +16,7 @@
 from autogen.runtime_logging import log_chat_completion, log_new_client, log_new_wrapper, logging_enabled
 from autogen.token_count_utils import count_token
 
-from .rate_limiters import TimeRateLimiter
+from .rate_limiters import RateLimiter, TimeRateLimiter
 
 TOOL_ENABLED = False
 try:
@@ -380,18 +380,13 @@ def __init__(self, *, config_list: Optional[List[Dict[str, Any]]] = None, **base
 
         self._clients: List[ModelClient] = []
         self._config_list: List[Dict[str, Any]] = []
-        self._rate_limiters: List[Optional[TimeRateLimiter]] = []
+        self._rate_limiters: List[Optional[RateLimiter]] = []
 
         if config_list:
+            self._initialize_rate_limiters(config_list)
+
             config_list = [config.copy() for config in config_list]  # make a copy before modifying
             for config in config_list:
-                # Instantiate the rate limiter
-                if "api_rate_limit" in config:
-                    self._rate_limiters.append(TimeRateLimiter(config["api_rate_limit"]))
-                    del config["api_rate_limit"]
-                else:
-                    self._rate_limiters.append(None)
-
                 self._register_default_client(config, openai_config)  # could modify the config
                 self._config_list.append(
                     {**extra_kwargs, **{k: v for k, v in config.items() if k not in self.openai_kwargs}}
@@ -932,4 +927,13 @@ def _throttle_api_calls(self, idx: int) -> None:
             limiter = self._rate_limiters[idx]
 
             assert limiter is not None
-            limiter.wait()
+            limiter.sleep()
+
+    def _initialize_rate_limiters(self, config_list: List[Dict[str, Any]]) -> None:
+        for config in config_list:
+            # Instantiate the rate limiter
+            if "api_rate_limit" in config:
+                self._rate_limiters.append(TimeRateLimiter(config["api_rate_limit"]))
+                del config["api_rate_limit"]
+            else:
+                self._rate_limiters.append(None)
diff --git a/autogen/oai/rate_limiters.py b/autogen/oai/rate_limiters.py
index 4ef0bd46d672..4b84a7f99400 100644
--- a/autogen/oai/rate_limiters.py
+++ b/autogen/oai/rate_limiters.py
@@ -1,4 +1,9 @@
 import time
+from typing import Protocol
+
+
+class RateLimiter(Protocol):
+    def sleep(self, *args, **kwargs): ...
 
 
 class TimeRateLimiter:
@@ -16,7 +21,7 @@ def __init__(self, rate: float):
         self._time_interval_seconds = 1.0 / rate
         self._last_time_called = 0.0
 
-    def wait(self):
+    def sleep(self, *args, **kwargs):
         """Synchronously waits until enough time has passed to allow the next operation.
 
         If the elapsed time since the last operation is less than the required time interval,
diff --git a/test/oai/test_rate_limiters.py b/test/oai/test_rate_limiters.py
index cf40270e3c4c..a04429c0dea2 100644
--- a/test/oai/test_rate_limiters.py
+++ b/test/oai/test_rate_limiters.py
@@ -14,7 +14,7 @@ def test_time_rate_limiter(execute_n_times):
 
     n_loops = 2
     for _ in range(n_loops):
-        rate_limiter.wait()
+        rate_limiter.sleep()
 
     total_time = time.time() - current_time_seconds
     min_expected_time = (n_loops - 1) / rate

From fd8a44c14ad4636f4d6eef74e3a568e5281ea0f3 Mon Sep 17 00:00:00 2001
From: Wael Karkoub <wael.karkoub96@gmail.com>
Date: Tue, 4 Jun 2024 16:13:44 +0100
Subject: [PATCH 12/12] undo change in doc

---
 website/docs/topics/llm_configuration.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/website/docs/topics/llm_configuration.ipynb b/website/docs/topics/llm_configuration.ipynb
index 2f51c8dc55d6..4edcc65cd51f 100644
--- a/website/docs/topics/llm_configuration.ipynb
+++ b/website/docs/topics/llm_configuration.ipynb
@@ -81,7 +81,7 @@
     "  <TabItem value=\"azureopenai\" label=\"Azure OpenAI\">\n",
     "    - `model` (str, required): The deployment to be used. The model corresponds to the deployment name on Azure OpenAI.\n",
     "    - `api_key` (str, optional): The API key required for authenticating requests to the model's API endpoint.\n",
-    "    - `api_type`: `azure`.\n",
+    "    - `api_type`: `azure`\n",
     "    - `api_rate_limit` (float, optional): Specifies the maximum number of API requests permitted per second.\n",
     "    - `base_url` (str, optional): The base URL of the API endpoint. This is the root address where API calls are directed.\n",
     "    - `api_version` (str, optional): The version of the Azure API you wish to use.\n",
@@ -174,7 +174,7 @@
     "\n",
     "### Config list filtering\n",
     "\n",
-    "As described above the list can be filtered based on certain criteria. This is defined as a dictionary of key to filter on and value to filter by. For example, if you have a list of configs and you want to select the one with the model \"gpt-3.5-turbo\" you can use the following filter:"
+    "As described above the list can be filtered based on certain criteria. This is defined as a dictionary of key to filter on and values to filter by. For example, if you have a list of configs and you want to select the one with the model \"gpt-3.5-turbo\" you can use the following filter:"
    ]
   },
   {