From a23ced6ce53560c5eeedc44f43c74fd696d474f4 Mon Sep 17 00:00:00 2001
From: Vijay Ramesh <vijay@regrello.com>
Date: Fri, 12 Jan 2024 09:54:41 -0800
Subject: [PATCH 01/14] implement redis cache mode, if redis_url is set in the
 llm_config then it will try to use this.  also adds a test to validate both
 the existing and the redis cache behavior.

---
 .github/workflows/contrib-openai.yml |  45 +++++++++++
 autogen/cache/__init__.py            |   0
 autogen/cache/abstract_cache_base.py |  89 ++++++++++++++++++++++
 autogen/cache/cache_factory.py       |  38 ++++++++++
 autogen/cache/disk_cache.py          |  88 +++++++++++++++++++++
 autogen/cache/redis_cache.py         | 109 +++++++++++++++++++++++++++
 autogen/oai/client.py                |  16 ++--
 setup.py                             |   1 +
 test/agentchat/test_cache.py         | 109 +++++++++++++++++++++++++++
 test/conftest.py                     |   2 +
 10 files changed, 492 insertions(+), 5 deletions(-)
 create mode 100644 autogen/cache/__init__.py
 create mode 100644 autogen/cache/abstract_cache_base.py
 create mode 100644 autogen/cache/cache_factory.py
 create mode 100644 autogen/cache/disk_cache.py
 create mode 100644 autogen/cache/redis_cache.py
 create mode 100644 test/agentchat/test_cache.py

diff --git a/.github/workflows/contrib-openai.yml b/.github/workflows/contrib-openai.yml
index 90eac3488ed3..6a5f7230901e 100644
--- a/.github/workflows/contrib-openai.yml
+++ b/.github/workflows/contrib-openai.yml
@@ -217,3 +217,48 @@ jobs:
         with:
           file: ./coverage.xml
           flags: unittests
+  CacheTest:
+    strategy:
+      matrix:
+        os: [ ubuntu-latest ]
+        python-version: [ "3.11" ]
+    runs-on: ${{ matrix.os }}
+    environment: openai1
+    services:
+      redis:
+        image: redis
+        ports:
+          - 6379:6379
+        options: --entrypoint redis-server
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install packages and dependencies
+        run: |
+          docker --version
+          python -m pip install --upgrade pip wheel
+          pip install -e .
+          pip install docker
+          python -c "import autogen"
+          pip install coverage pytest-asyncio
+      - name: Install packages and dependencies for redis
+        run: |
+          pip install -e .[redis]
+      - name: Coverage
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+          AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
+          OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
+        run: |
+          pip install coverage>=5.3
+          coverage run -a -m pytest test/agentchat/test_cache.py
+          coverage xml
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v3
+        with:
+          file: ./coverage.xml
+          flags: unittests
\ No newline at end of file
diff --git a/autogen/cache/__init__.py b/autogen/cache/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/autogen/cache/abstract_cache_base.py b/autogen/cache/abstract_cache_base.py
new file mode 100644
index 000000000000..81c12a836a91
--- /dev/null
+++ b/autogen/cache/abstract_cache_base.py
@@ -0,0 +1,89 @@
+from abc import ABC, abstractmethod
+
+class AbstractCache(ABC):
+    """
+    Abstract base class for cache implementations.
+
+    This class defines the basic interface for cache operations.
+    Implementing classes should provide concrete implementations for
+    these methods to handle caching mechanisms.
+    """
+
+    @abstractmethod
+    def get(self, key, default=None):
+        """
+        Retrieve an item from the cache.
+
+        Abstract method that must be implemented by subclasses to
+        retrieve an item from the cache.
+
+        Args:
+            key (str): The key identifying the item in the cache.
+            default (optional): The default value to return if the key is not found.
+                                Defaults to None.
+
+        Returns:
+            The value associated with the key if found, else the default value.
+
+        Raises:
+            NotImplementedError: If the subclass does not implement this method.
+        """
+
+    @abstractmethod
+    def set(self, key, value):
+        """
+        Set an item in the cache.
+
+        Abstract method that must be implemented by subclasses to
+        store an item in the cache.
+
+        Args:
+            key (str): The key under which the item is to be stored.
+            value: The value to be stored in the cache.
+
+        Raises:
+            NotImplementedError: If the subclass does not implement this method.
+        """
+
+    @abstractmethod
+    def close(self):
+        """
+        Close the cache.
+
+        Abstract method that should be implemented by subclasses to
+        perform any necessary cleanup, such as closing network connections or
+        releasing resources.
+
+        Raises:
+            NotImplementedError: If the subclass does not implement this method.
+        """
+
+    @abstractmethod
+    def __enter__(self):
+        """
+        Enter the runtime context related to this object.
+
+        The with statement will bind this method’s return value to the target(s)
+        specified in the as clause of the statement, if any.
+
+        Raises:
+            NotImplementedError: If the subclass does not implement this method.
+        """
+
+    @abstractmethod
+    def __exit__(self, exc_type, exc_value, traceback):
+        """
+        Exit the runtime context and close the cache.
+
+        Abstract method that should be implemented by subclasses to handle
+        the exit from a with statement. It is responsible for resource
+        release and cleanup.
+
+        Args:
+            exc_type: The exception type if an exception was raised in the context.
+            exc_value: The exception value if an exception was raised in the context.
+            traceback: The traceback if an exception was raised in the context.
+
+        Raises:
+            NotImplementedError: If the subclass does not implement this method.
+        """
diff --git a/autogen/cache/cache_factory.py b/autogen/cache/cache_factory.py
new file mode 100644
index 000000000000..84f0b1eea2e9
--- /dev/null
+++ b/autogen/cache/cache_factory.py
@@ -0,0 +1,38 @@
+from autogen.cache.disk_cache import DiskCache
+
+try:
+    from autogen.cache.redis_cache import RedisCache
+except ImportError:
+    RedisCache = None
+
+def cache_factory(seed, redis_url):
+    """
+    Factory function for creating cache instances.
+
+    Based on the provided redis_url, this function decides whether to create a RedisCache
+    or DiskCache instance. If RedisCache is available and redis_url is provided,
+    a RedisCache instance is created. Otherwise, a DiskCache instance is used.
+
+    Args:
+        seed (str): A string used as a seed or namespace for the cache.
+                    This could be useful for creating distinct cache instances
+                    or for namespacing keys in the cache.
+        redis_url (str or None): The URL for the Redis server. If this is None
+                                 or if RedisCache is not available, a DiskCache instance is created.
+
+    Returns:
+        An instance of either RedisCache or DiskCache, depending on the availability of RedisCache
+        and the provided redis_url.
+
+    Examples:
+        Creating a Redis cache
+        > redis_cache = cache_factory("myseed", "redis://localhost:6379/0")
+
+        Creating a Disk cache
+        > disk_cache = cache_factory("myseed", None)
+    """
+    if RedisCache is not None and redis_url is not None:
+        return RedisCache(seed, redis_url)
+    else:
+        cache_path_root: str = ".cache"
+        return DiskCache(f"./{cache_path_root}/{seed}")
diff --git a/autogen/cache/disk_cache.py b/autogen/cache/disk_cache.py
new file mode 100644
index 000000000000..52ebd5b5067a
--- /dev/null
+++ b/autogen/cache/disk_cache.py
@@ -0,0 +1,88 @@
+import diskcache
+from .abstract_cache_base import AbstractCache
+
+
+class DiskCache(AbstractCache):
+    """
+    Implementation of AbstractCache using the DiskCache library.
+
+    This class provides a concrete implementation of the AbstractCache
+    interface using the diskcache library for caching data on disk.
+
+    Attributes:
+        cache (diskcache.Cache): The DiskCache instance used for caching.
+
+    Methods:
+        __init__(self, seed): Initializes the DiskCache with the given seed.
+        get(self, key, default=None): Retrieves an item from the cache.
+        set(self, key, value): Sets an item in the cache.
+        close(self): Closes the cache.
+        __enter__(self): Context management entry.
+        __exit__(self, exc_type, exc_value, traceback): Context management exit.
+    """
+
+    def __init__(self, seed):
+        """
+        Initialize the DiskCache instance.
+
+        Args:
+            seed (str): A seed or namespace for the cache. This is used to create
+                        a unique storage location for the cache data.
+
+        """
+        self.cache = diskcache.Cache(seed)
+
+    def get(self, key, default=None):
+        """
+        Retrieve an item from the cache.
+
+        Args:
+            key (str): The key identifying the item in the cache.
+            default (optional): The default value to return if the key is not found.
+                                Defaults to None.
+
+        Returns:
+            The value associated with the key if found, else the default value.
+        """
+        return self.cache.get(key, default)
+
+    def set(self, key, value):
+        """
+        Set an item in the cache.
+
+        Args:
+            key (str): The key under which the item is to be stored.
+            value: The value to be stored in the cache.
+        """
+        self.cache.set(key, value)
+
+    def close(self):
+        """
+        Close the cache.
+
+        Perform any necessary cleanup, such as closing file handles or
+        releasing resources.
+        """
+        self.cache.close()
+
+    def __enter__(self):
+        """
+        Enter the runtime context related to the object.
+
+        Returns:
+            self: The instance itself.
+        """
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        """
+        Exit the runtime context related to the object.
+
+        Perform cleanup actions such as closing the cache.
+
+        Args:
+            exc_type: The exception type if an exception was raised in the context.
+            exc_value: The exception value if an exception was raised in the context.
+            traceback: The traceback if an exception was raised in the context.
+        """
+        self.close()
diff --git a/autogen/cache/redis_cache.py b/autogen/cache/redis_cache.py
new file mode 100644
index 000000000000..9a730516cd29
--- /dev/null
+++ b/autogen/cache/redis_cache.py
@@ -0,0 +1,109 @@
+import pickle
+import redis
+from .abstract_cache_base import AbstractCache
+
+class RedisCache(AbstractCache):
+    """
+    Implementation of AbstractCache using the Redis database.
+
+    This class provides a concrete implementation of the AbstractCache
+    interface using the Redis database for caching data.
+
+    Attributes:
+        seed (str): A seed or namespace used as a prefix for cache keys.
+        cache (redis.Redis): The Redis client used for caching.
+
+    Methods:
+        __init__(self, seed, redis_url): Initializes the RedisCache with the given seed and Redis URL.
+        _prefixed_key(self, key): Internal method to get a namespaced cache key.
+        get(self, key, default=None): Retrieves an item from the cache.
+        set(self, key, value): Sets an item in the cache.
+        close(self): Closes the Redis client.
+        __enter__(self): Context management entry.
+        __exit__(self, exc_type, exc_value, traceback): Context management exit.
+    """
+
+    def __init__(self, seed, redis_url):
+        """
+        Initialize the RedisCache instance.
+
+        Args:
+            seed (str): A seed or namespace for the cache. This is used as a prefix for all cache keys.
+            redis_url (str): The URL for the Redis server.
+
+        """
+        self.seed = seed
+        self.cache = redis.Redis.from_url(redis_url)
+
+    def _prefixed_key(self, key):
+        """
+        Get a namespaced key for the cache.
+
+        Args:
+            key (str): The original key.
+
+        Returns:
+            str: The namespaced key.
+        """
+        return f"autogen:{self.seed}:{key}"
+
+    def get(self, key, default=None):
+        """
+        Retrieve an item from the Redis cache.
+
+        Args:
+            key (str): The key identifying the item in the cache.
+            default (optional): The default value to return if the key is not found.
+                                Defaults to None.
+
+        Returns:
+            The deserialized value associated with the key if found, else the default value.
+        """
+        result = self.cache.get(self._prefixed_key(key))
+        if result is None:
+            return default
+        return pickle.loads(result)
+
+    def set(self, key, value):
+        """
+        Set an item in the Redis cache.
+
+        Args:
+            key (str): The key under which the item is to be stored.
+            value: The value to be stored in the cache.
+
+        Notes:
+            The value is serialized using pickle before being stored in Redis.
+        """
+        serialized_value = pickle.dumps(value, protocol=0)
+        self.cache.set(self._prefixed_key(key), serialized_value)
+
+    def close(self):
+        """
+        Close the Redis client.
+
+        Perform any necessary cleanup, such as closing network connections.
+        """
+        self.cache.close()
+
+    def __enter__(self):
+        """
+        Enter the runtime context related to the object.
+
+        Returns:
+            self: The instance itself.
+        """
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        """
+        Exit the runtime context related to the object.
+
+        Perform cleanup actions such as closing the Redis client.
+
+        Args:
+            exc_type: The exception type if an exception was raised in the context.
+            exc_value: The exception value if an exception was raised in the context.
+            traceback: The traceback if an exception was raised in the context.
+        """
+        self.close()
diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 65ad14254091..45d551ad8a2c 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -8,7 +8,6 @@
 from flaml.automl.logger import logger_formatter
 
 from pydantic import BaseModel
-
 from autogen.oai import completion
 
 from autogen.oai.openai_utils import DEFAULT_AZURE_API_VERSION, get_key, OAI_PRICE1K
@@ -35,7 +34,9 @@
     )
     from openai.types.completion import Completion
     from openai.types.completion_usage import CompletionUsage
-    import diskcache
+
+    # cache wrapper
+    from autogen.cache.cache_factory import cache_factory
 
     if openai.__version__ >= "1.1.0":
         TOOL_ENABLED = True
@@ -52,7 +53,6 @@
 class OpenAIWrapper:
     """A wrapper class for openai client."""
 
-    cache_path_root: str = ".cache"
     extra_kwargs = {
         "cache_seed",
         "filter_func",
@@ -62,6 +62,7 @@ class OpenAIWrapper:
         "api_type",
         "tags",
     }
+
     openai_kwargs = set(inspect.getfullargspec(OpenAI.__init__).kwonlyargs)
     aopenai_kwargs = set(inspect.getfullargspec(AzureOpenAI.__init__).kwonlyargs)
     openai_kwargs = openai_kwargs | aopenai_kwargs
@@ -208,6 +209,10 @@ def create(self, **config: Any) -> ChatCompletion:
             - `cache_seed` (int | None) for the cache. Default to 41.
                 An integer cache_seed is useful when implementing "controlled randomness" for the completion.
                 None for no caching.
+            - `redis_url` (str | None) for the redis cache. Default to None.
+                A string redis_url formatted like "redis://:password@localhost:6379/0" will turn on the redis cache.
+                None for no redis cache. If `cache_seed` is None, redis_url will be ignored
+                You must install redis to use redis cache.
             - filter_func (Callable | None): A function that takes in the context and the response
                 and returns a boolean to indicate whether the response is valid. E.g.,
 
@@ -236,12 +241,13 @@ def yes_or_no_filter(context, response):
             params = self._construct_create_params(create_config, extra_kwargs)
             # get the cache_seed, filter_func and context
             cache_seed = extra_kwargs.get("cache_seed", 41)
+            redis_url = extra_kwargs.get("redis_url", None)
             filter_func = extra_kwargs.get("filter_func")
             context = extra_kwargs.get("context")
 
             # Try to load the response from cache
             if cache_seed is not None:
-                with diskcache.Cache(f"{self.cache_path_root}/{cache_seed}") as cache:
+                with cache_factory(f"{cache_seed}", redis_url) as cache:
                     # Try to get the response from cache
                     key = get_key(params)
                     response: ChatCompletion = cache.get(key, None)
@@ -278,7 +284,7 @@ def yes_or_no_filter(context, response):
                 self._update_usage_summary(response, use_cache=False)
                 if cache_seed is not None:
                     # Cache the response
-                    with diskcache.Cache(f"{self.cache_path_root}/{cache_seed}") as cache:
+                    with cache_factory(f"{cache_seed}", redis_url) as cache:
                         cache.set(key, response)
 
                 # check the filter
diff --git a/setup.py b/setup.py
index 215e88980b63..25a08edabebf 100644
--- a/setup.py
+++ b/setup.py
@@ -55,6 +55,7 @@
         "teachable": ["chromadb"],
         "lmm": ["replicate", "pillow"],
         "graphs": ["networkx~=3.2.1", "matplotlib~=3.8.1"],
+        "redis": ["redis"],
     },
     classifiers=[
         "Programming Language :: Python :: 3",
diff --git a/test/agentchat/test_cache.py b/test/agentchat/test_cache.py
new file mode 100644
index 000000000000..5c23e47e64ca
--- /dev/null
+++ b/test/agentchat/test_cache.py
@@ -0,0 +1,109 @@
+import os
+import sys
+import time
+
+import pytest
+import autogen
+from autogen.agentchat import AssistantAgent, UserProxyAgent
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+from conftest import skip_openai, skip_redis  # noqa: E402
+
+try:
+    from openai import OpenAI
+except ImportError:
+    skip = True
+else:
+    skip = False or skip_openai
+
+try:
+    import redis
+except ImportError:
+    skip_redis = True
+else:
+    skip_redis = False or skip_redis
+
+@pytest.mark.skipif(skip, reason="openai not installed OR requested to skip")
+def test_disk_cache(human_input_mode="NEVER", max_consecutive_auto_reply=5):
+    random_cache_seed = int.from_bytes(os.urandom(2), "big")
+    start_time = time.time()
+    cold_cache_messages = run_conversation(cache_seed=random_cache_seed, human_input_mode=human_input_mode, max_consecutive_auto_reply=max_consecutive_auto_reply)
+    end_time = time.time()
+    duration_with_cold_cache = end_time - start_time
+
+    start_time = time.time()
+    warm_cache_messages = run_conversation(cache_seed=random_cache_seed, human_input_mode=human_input_mode, max_consecutive_auto_reply=max_consecutive_auto_reply)
+    end_time = time.time()
+    duration_with_warm_cache = end_time - start_time
+    assert cold_cache_messages == warm_cache_messages
+    assert duration_with_warm_cache < duration_with_cold_cache
+
+@pytest.mark.skipif(skip_redis, reason="redis not installed OR requested to skip")
+def test_redis_cache(human_input_mode="NEVER", max_consecutive_auto_reply=5):
+    random_cache_seed = int.from_bytes(os.urandom(2), "big")
+    redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
+    start_time = time.time()
+    cold_cache_messages = run_conversation(cache_seed=random_cache_seed, redis_url=redis_url, human_input_mode=human_input_mode, max_consecutive_auto_reply=max_consecutive_auto_reply)
+    end_time = time.time()
+    duration_with_cold_cache = end_time - start_time
+
+    start_time = time.time()
+    warm_cache_messages = run_conversation(cache_seed=random_cache_seed, redis_url=redis_url, human_input_mode=human_input_mode, max_consecutive_auto_reply=max_consecutive_auto_reply)
+    end_time = time.time()
+    duration_with_warm_cache = end_time - start_time
+    assert cold_cache_messages == warm_cache_messages
+    assert duration_with_warm_cache < duration_with_cold_cache
+def run_conversation(cache_seed, redis_url=None, human_input_mode="NEVER", max_consecutive_auto_reply=5):
+    KEY_LOC = "notebook"
+    OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
+    here = os.path.abspath(os.path.dirname(__file__))
+    config_list = autogen.config_list_from_json(
+        OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+        filter_dict={
+            "model": {
+                "gpt-3.5-turbo",
+                "gpt-35-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-16k-0613",
+                "gpt-3.5-turbo-0301",
+                "chatgpt-35-turbo-0301",
+                "gpt-35-turbo-v0301",
+                "gpt",
+            },
+        },
+    )
+    llm_config = {
+        "cache_seed": cache_seed,
+        "redis_url": redis_url,
+        "config_list": config_list,
+        "max_tokens": 1024,
+    }
+    assistant = AssistantAgent(
+        "coding_agent",
+        llm_config=llm_config,
+    )
+    user = UserProxyAgent(
+        "user",
+        human_input_mode=human_input_mode,
+        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
+        max_consecutive_auto_reply=max_consecutive_auto_reply,
+        code_execution_config={
+            "work_dir": f"{here}/test_agent_scripts",
+            "use_docker": "python:3",
+            "timeout": 60,
+        },
+        llm_config=llm_config,
+        system_message="""Is code provided but not enclosed in ``` blocks?
+    If so, remind that code blocks need to be enclosed in ``` blocks.
+    Reply TERMINATE to end the conversation if the task is finished. Don't say appreciation.
+    If "Thank you" or "You\'re welcome" are said in the conversation, then say TERMINATE and that is your last message.""",
+    )
+    user.initiate_chat(assistant, message="TERMINATE")
+    # should terminate without sending any message
+    assert assistant.last_message()["content"] == assistant.last_message(user)["content"] == "TERMINATE"
+    coding_task = "Print hello world to a file called hello.txt"
+
+    # track how long this takes
+    user.initiate_chat(assistant, message=coding_task)
+    return user.chat_messages[list(user.chat_messages.keys())[-0]]
+
diff --git a/test/conftest.py b/test/conftest.py
index 8cf1762b4ae4..fd65deccea7f 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -14,3 +14,5 @@ def pytest_addoption(parser):
 def pytest_configure(config):
     global skip_openai
     skip_openai = config.getoption("--skip-openai", False)
+    global skip_redis
+    skip_redis = config.getoption("--skip-redis", False)

From 036469784c6aca696f2caa2d7c07773abf2dd159 Mon Sep 17 00:00:00 2001
From: Vijay Ramesh <vijay@regrello.com>
Date: Sat, 13 Jan 2024 13:43:46 -0800
Subject: [PATCH 02/14] PR feedback, add unit tests

---
 .github/workflows/contrib-openai.yml |  2 +-
 .github/workflows/contrib-tests.yml  | 33 +++++++++++++
 autogen/cache/abstract_cache_base.py |  1 +
 autogen/cache/cache_factory.py       |  1 +
 autogen/cache/redis_cache.py         |  3 +-
 test/agentchat/test_cache.py         | 44 +++++++++++++-----
 test/cache/test_disk_cache.py        | 51 ++++++++++++++++++++
 test/cache/test_redis_cache.py       | 69 ++++++++++++++++++++++++++++
 8 files changed, 191 insertions(+), 13 deletions(-)
 create mode 100644 test/cache/test_disk_cache.py
 create mode 100644 test/cache/test_redis_cache.py

diff --git a/.github/workflows/contrib-openai.yml b/.github/workflows/contrib-openai.yml
index 6a5f7230901e..cbf5d6577a26 100644
--- a/.github/workflows/contrib-openai.yml
+++ b/.github/workflows/contrib-openai.yml
@@ -261,4 +261,4 @@ jobs:
         uses: codecov/codecov-action@v3
         with:
           file: ./coverage.xml
-          flags: unittests
\ No newline at end of file
+          flags: unittests
diff --git a/.github/workflows/contrib-tests.yml b/.github/workflows/contrib-tests.yml
index 27a616b77190..b2ee4b1c3686 100644
--- a/.github/workflows/contrib-tests.yml
+++ b/.github/workflows/contrib-tests.yml
@@ -207,3 +207,36 @@ jobs:
         with:
           file: ./coverage.xml
           flags: unittests
+  CacheTest:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ ubuntu-latest, macos-latest, windows-2019 ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install packages and dependencies for all tests
+        run: |
+          python -m pip install --upgrade pip wheel
+          pip install -e .[test]
+          pip install -e .[redis]
+      - name: Test Cache
+        run: |
+          pytest test/cache/test_redis_cache.py test/cache/test_disk_cache.py --skip-openai
+      - name: Coverage
+        if: matrix.python-version == '3.10'
+        run: |
+          pip install coverage>=5.3
+          coverage run -a -m pytest test/cache/test_redis_cache.py test/cache/test_disk_cache.py --skip-openai
+          coverage xml
+      - name: Upload coverage to Codecov
+        if: matrix.python-version == '3.10'
+        uses: codecov/codecov-action@v3
+        with:
+          file: ./coverage.xml
+          flags: unittests
diff --git a/autogen/cache/abstract_cache_base.py b/autogen/cache/abstract_cache_base.py
index 81c12a836a91..846d929840b9 100644
--- a/autogen/cache/abstract_cache_base.py
+++ b/autogen/cache/abstract_cache_base.py
@@ -1,5 +1,6 @@
 from abc import ABC, abstractmethod
 
+
 class AbstractCache(ABC):
     """
     Abstract base class for cache implementations.
diff --git a/autogen/cache/cache_factory.py b/autogen/cache/cache_factory.py
index 84f0b1eea2e9..67d1ba5c1a0b 100644
--- a/autogen/cache/cache_factory.py
+++ b/autogen/cache/cache_factory.py
@@ -5,6 +5,7 @@
 except ImportError:
     RedisCache = None
 
+
 def cache_factory(seed, redis_url):
     """
     Factory function for creating cache instances.
diff --git a/autogen/cache/redis_cache.py b/autogen/cache/redis_cache.py
index 9a730516cd29..88f7a36b9a28 100644
--- a/autogen/cache/redis_cache.py
+++ b/autogen/cache/redis_cache.py
@@ -2,6 +2,7 @@
 import redis
 from .abstract_cache_base import AbstractCache
 
+
 class RedisCache(AbstractCache):
     """
     Implementation of AbstractCache using the Redis database.
@@ -75,7 +76,7 @@ def set(self, key, value):
         Notes:
             The value is serialized using pickle before being stored in Redis.
         """
-        serialized_value = pickle.dumps(value, protocol=0)
+        serialized_value = pickle.dumps(value)
         self.cache.set(self._prefixed_key(key), serialized_value)
 
     def close(self):
diff --git a/test/agentchat/test_cache.py b/test/agentchat/test_cache.py
index 5c23e47e64ca..9f958e81d568 100644
--- a/test/agentchat/test_cache.py
+++ b/test/agentchat/test_cache.py
@@ -5,53 +5,76 @@
 import pytest
 import autogen
 from autogen.agentchat import AssistantAgent, UserProxyAgent
+
 sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 from conftest import skip_openai, skip_redis  # noqa: E402
 
 try:
     from openai import OpenAI
 except ImportError:
-    skip = True
+    skip_openai_tests = True
 else:
-    skip = False or skip_openai
+    skip_openai_tests = False or skip_openai
 
 try:
     import redis
 except ImportError:
-    skip_redis = True
+    skip_redis_tests = True
 else:
-    skip_redis = False or skip_redis
+    skip_redis_tests = False or skip_redis
+
 
-@pytest.mark.skipif(skip, reason="openai not installed OR requested to skip")
+@pytest.mark.skipif(skip_openai_tests, reason="openai not installed OR requested to skip")
 def test_disk_cache(human_input_mode="NEVER", max_consecutive_auto_reply=5):
     random_cache_seed = int.from_bytes(os.urandom(2), "big")
     start_time = time.time()
-    cold_cache_messages = run_conversation(cache_seed=random_cache_seed, human_input_mode=human_input_mode, max_consecutive_auto_reply=max_consecutive_auto_reply)
+    cold_cache_messages = run_conversation(
+        cache_seed=random_cache_seed,
+        human_input_mode=human_input_mode,
+        max_consecutive_auto_reply=max_consecutive_auto_reply,
+    )
     end_time = time.time()
     duration_with_cold_cache = end_time - start_time
 
     start_time = time.time()
-    warm_cache_messages = run_conversation(cache_seed=random_cache_seed, human_input_mode=human_input_mode, max_consecutive_auto_reply=max_consecutive_auto_reply)
+    warm_cache_messages = run_conversation(
+        cache_seed=random_cache_seed,
+        human_input_mode=human_input_mode,
+        max_consecutive_auto_reply=max_consecutive_auto_reply,
+    )
     end_time = time.time()
     duration_with_warm_cache = end_time - start_time
     assert cold_cache_messages == warm_cache_messages
     assert duration_with_warm_cache < duration_with_cold_cache
 
-@pytest.mark.skipif(skip_redis, reason="redis not installed OR requested to skip")
+
+@pytest.mark.skipif(skip_openai_tests or skip_redis_tests, reason="redis not installed OR requested to skip")
 def test_redis_cache(human_input_mode="NEVER", max_consecutive_auto_reply=5):
     random_cache_seed = int.from_bytes(os.urandom(2), "big")
     redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
     start_time = time.time()
-    cold_cache_messages = run_conversation(cache_seed=random_cache_seed, redis_url=redis_url, human_input_mode=human_input_mode, max_consecutive_auto_reply=max_consecutive_auto_reply)
+    cold_cache_messages = run_conversation(
+        cache_seed=random_cache_seed,
+        redis_url=redis_url,
+        human_input_mode=human_input_mode,
+        max_consecutive_auto_reply=max_consecutive_auto_reply,
+    )
     end_time = time.time()
     duration_with_cold_cache = end_time - start_time
 
     start_time = time.time()
-    warm_cache_messages = run_conversation(cache_seed=random_cache_seed, redis_url=redis_url, human_input_mode=human_input_mode, max_consecutive_auto_reply=max_consecutive_auto_reply)
+    warm_cache_messages = run_conversation(
+        cache_seed=random_cache_seed,
+        redis_url=redis_url,
+        human_input_mode=human_input_mode,
+        max_consecutive_auto_reply=max_consecutive_auto_reply,
+    )
     end_time = time.time()
     duration_with_warm_cache = end_time - start_time
     assert cold_cache_messages == warm_cache_messages
     assert duration_with_warm_cache < duration_with_cold_cache
+
+
 def run_conversation(cache_seed, redis_url=None, human_input_mode="NEVER", max_consecutive_auto_reply=5):
     KEY_LOC = "notebook"
     OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
@@ -106,4 +129,3 @@ def run_conversation(cache_seed, redis_url=None, human_input_mode="NEVER", max_c
     # track how long this takes
     user.initiate_chat(assistant, message=coding_task)
     return user.chat_messages[list(user.chat_messages.keys())[-0]]
-
diff --git a/test/cache/test_disk_cache.py b/test/cache/test_disk_cache.py
new file mode 100644
index 000000000000..a8f8240588e1
--- /dev/null
+++ b/test/cache/test_disk_cache.py
@@ -0,0 +1,51 @@
+import unittest
+from unittest.mock import patch, MagicMock
+from autogen.cache.disk_cache import DiskCache
+
+
+class TestDiskCache(unittest.TestCase):
+    def setUp(self):
+        self.seed = "test_seed"
+
+    @patch("autogen.cache.disk_cache.diskcache.Cache", return_value=MagicMock())
+    def test_init(self, mock_cache):
+        cache = DiskCache(self.seed)
+        self.assertIsInstance(cache.cache, MagicMock)
+        mock_cache.assert_called_with(self.seed)
+
+    @patch("autogen.cache.disk_cache.diskcache.Cache", return_value=MagicMock())
+    def test_get(self, mock_cache):
+        key = "key"
+        value = "value"
+        cache = DiskCache(self.seed)
+        cache.cache.get.return_value = value
+        self.assertEqual(cache.get(key), value)
+        cache.cache.get.assert_called_with(key, None)
+
+        cache.cache.get.return_value = None
+        self.assertIsNone(cache.get(key, None))
+
+    @patch("autogen.cache.disk_cache.diskcache.Cache", return_value=MagicMock())
+    def test_set(self, mock_cache):
+        key = "key"
+        value = "value"
+        cache = DiskCache(self.seed)
+        cache.set(key, value)
+        cache.cache.set.assert_called_with(key, value)
+
+    @patch("autogen.cache.disk_cache.diskcache.Cache", return_value=MagicMock())
+    def test_context_manager(self, mock_cache):
+        with DiskCache(self.seed) as cache:
+            self.assertIsInstance(cache, DiskCache)
+            mock_cache_instance = cache.cache
+        mock_cache_instance.close.assert_called()
+
+    @patch("autogen.cache.disk_cache.diskcache.Cache", return_value=MagicMock())
+    def test_close(self, mock_cache):
+        cache = DiskCache(self.seed)
+        cache.close()
+        cache.cache.close.assert_called()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/cache/test_redis_cache.py b/test/cache/test_redis_cache.py
new file mode 100644
index 000000000000..d40fc9459f44
--- /dev/null
+++ b/test/cache/test_redis_cache.py
@@ -0,0 +1,69 @@
+import unittest
+import pickle
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+try:
+    from autogen.cache.redis_cache import RedisCache
+
+    skip_redis_tests = False
+except ImportError:
+    skip_redis_tests = True
+
+
+class TestRedisCache(unittest.TestCase):
+    def setUp(self):
+        self.seed = "test_seed"
+        self.redis_url = "redis://localhost:6379/0"
+
+    @pytest.mark.skipif(skip_redis_tests, reason="redis not installed")
+    @patch("autogen.cache.redis_cache.redis.Redis.from_url", return_value=MagicMock())
+    def test_init(self, mock_redis_from_url):
+        cache = RedisCache(self.seed, self.redis_url)
+        self.assertEqual(cache.seed, self.seed)
+        mock_redis_from_url.assert_called_with(self.redis_url)
+
+    @pytest.mark.skipif(skip_redis_tests, reason="redis not installed")
+    @patch("autogen.cache.redis_cache.redis.Redis.from_url", return_value=MagicMock())
+    def test_prefixed_key(self, mock_redis_from_url):
+        cache = RedisCache(self.seed, self.redis_url)
+        key = "test_key"
+        expected_prefixed_key = f"autogen:{self.seed}:{key}"
+        self.assertEqual(cache._prefixed_key(key), expected_prefixed_key)
+
+    @pytest.mark.skipif(skip_redis_tests, reason="redis not installed")
+    @patch("autogen.cache.redis_cache.redis.Redis.from_url", return_value=MagicMock())
+    def test_get(self, mock_redis_from_url):
+        key = "key"
+        value = "value"
+        serialized_value = pickle.dumps(value)
+        cache = RedisCache(self.seed, self.redis_url)
+        cache.cache.get.return_value = serialized_value
+        self.assertEqual(cache.get(key), value)
+        cache.cache.get.assert_called_with(f"autogen:{self.seed}:{key}")
+
+        cache.cache.get.return_value = None
+        self.assertIsNone(cache.get(key))
+
+    @pytest.mark.skipif(skip_redis_tests, reason="redis not installed")
+    @patch("autogen.cache.redis_cache.redis.Redis.from_url", return_value=MagicMock())
+    def test_set(self, mock_redis_from_url):
+        key = "key"
+        value = "value"
+        serialized_value = pickle.dumps(value)
+        cache = RedisCache(self.seed, self.redis_url)
+        cache.set(key, value)
+        cache.cache.set.assert_called_with(f"autogen:{self.seed}:{key}", serialized_value)
+
+    @pytest.mark.skipif(skip_redis_tests, reason="redis not installed")
+    @patch("autogen.cache.redis_cache.redis.Redis.from_url", return_value=MagicMock())
+    def test_context_manager(self, mock_redis_from_url):
+        with RedisCache(self.seed, self.redis_url) as cache:
+            self.assertIsInstance(cache, RedisCache)
+            mock_redis_instance = cache.cache
+        mock_redis_instance.close.assert_called()
+
+
+if __name__ == "__main__":
+    unittest.main()

From 8ac270f58fd201294463c15fef652491d68afa5f Mon Sep 17 00:00:00 2001
From: Vijay Ramesh <vijay@regrello.com>
Date: Sun, 14 Jan 2024 13:42:29 -0800
Subject: [PATCH 03/14] more PR feedback, move the new style cache to a context
 manager

---
 .github/workflows/contrib-openai.yml   |   2 +-
 .github/workflows/contrib-tests.yml    |   4 +-
 autogen/agentchat/conversable_agent.py |   9 +
 autogen/agentchat/groupchat.py         |   3 +
 autogen/cache/cache.py                 | 137 +++++++++++++++
 autogen/cache/cache_factory.py         |  63 +++----
 autogen/oai/client.py                  |  27 +--
 test/agentchat/test_cache.py           | 131 --------------
 test/agentchat/test_cache_agent.py     | 228 +++++++++++++++++++++++++
 test/cache/test_cache.py               |  53 ++++++
 website/docs/Use-Cases/agent_chat.md   |  29 ++++
 11 files changed, 509 insertions(+), 177 deletions(-)
 create mode 100644 autogen/cache/cache.py
 delete mode 100644 test/agentchat/test_cache.py
 create mode 100644 test/agentchat/test_cache_agent.py
 create mode 100644 test/cache/test_cache.py

diff --git a/.github/workflows/contrib-openai.yml b/.github/workflows/contrib-openai.yml
index cbf5d6577a26..7a8edd874468 100644
--- a/.github/workflows/contrib-openai.yml
+++ b/.github/workflows/contrib-openai.yml
@@ -255,7 +255,7 @@ jobs:
           OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
         run: |
           pip install coverage>=5.3
-          coverage run -a -m pytest test/agentchat/test_cache.py
+          coverage run -a -m pytest test/agentchat/test_cache_agent.py
           coverage xml
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
diff --git a/.github/workflows/contrib-tests.yml b/.github/workflows/contrib-tests.yml
index b2ee4b1c3686..4c98975a242b 100644
--- a/.github/workflows/contrib-tests.yml
+++ b/.github/workflows/contrib-tests.yml
@@ -227,12 +227,12 @@ jobs:
           pip install -e .[redis]
       - name: Test Cache
         run: |
-          pytest test/cache/test_redis_cache.py test/cache/test_disk_cache.py --skip-openai
+          pytest test/cache/test_cache.py test/cache/test_redis_cache.py test/cache/test_disk_cache.py --skip-openai
       - name: Coverage
         if: matrix.python-version == '3.10'
         run: |
           pip install coverage>=5.3
-          coverage run -a -m pytest test/cache/test_redis_cache.py test/cache/test_disk_cache.py --skip-openai
+          coverage run -a -m pytest test/cache/test_cache.py test/cache/test_redis_cache.py test/cache/test_disk_cache.py --skip-openai
           coverage xml
       - name: Upload coverage to Codecov
         if: matrix.python-version == '3.10'
diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py
index 919839b6c393..26b3c61ffbf1 100644
--- a/autogen/agentchat/conversable_agent.py
+++ b/autogen/agentchat/conversable_agent.py
@@ -9,6 +9,7 @@
 from typing import Any, Awaitable, Callable, Dict, List, Literal, Optional, Tuple, Type, TypeVar, Union
 
 from .. import OpenAIWrapper
+from ..cache.cache import Cache
 from ..code_utils import DEFAULT_MODEL, UNKNOWN, content_str, execute_code, extract_code, infer_lang
 from ..function_utils import get_function_schema, load_basemodels_if_needed, serialize_to_str
 from .agent import Agent
@@ -125,6 +126,9 @@ def __init__(
                 self.llm_config.update(llm_config)
             self.client = OpenAIWrapper(**self.llm_config)
 
+        # initialize standalone cache client
+        self.cache_client = None
+
         self._code_execution_config: Union[Dict, Literal[False]] = (
             {} if code_execution_config is None else code_execution_config
         )
@@ -648,6 +652,7 @@ def initiate_chat(
         recipient: "ConversableAgent",
         clear_history: Optional[bool] = True,
         silent: Optional[bool] = False,
+        cache_client: Optional[Cache] = None,
         **context,
     ):
         """Initiate a chat with the recipient agent.
@@ -660,6 +665,7 @@ def initiate_chat(
             recipient: the recipient agent.
             clear_history (bool): whether to clear the chat history with the agent.
             silent (bool or None): (Experimental) whether to print the messages for this conversation.
+            cache_client (Cache or None): the cache client to be used for this conversation.
             **context: any context information.
                 "message" needs to be provided if the `generate_init_message` method is not overridden.
 
@@ -669,6 +675,8 @@ def initiate_chat(
         for agent in [self, recipient]:
             agent._raise_exception_on_async_reply_functions()
         self._prepare_chat(recipient, clear_history)
+        self.cache_client = cache_client
+        recipient.cache_client = cache_client
         self.send(self.generate_init_message(**context), recipient, silent=silent)
 
     async def a_initiate_chat(
@@ -758,6 +766,7 @@ def generate_oai_reply(
                 all_messages.append(message)
 
         # TODO: #1143 handle token limit exceeded error
+        client.cache = self.cache_client
         response = client.create(
             context=messages[-1].pop("context", None), messages=self._oai_system_message + all_messages
         )
diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py
index 501da7b41131..2006b2169e64 100644
--- a/autogen/agentchat/groupchat.py
+++ b/autogen/agentchat/groupchat.py
@@ -356,6 +356,9 @@ def run_chat(
         message = messages[-1]
         speaker = sender
         groupchat = config
+        if self.cache_client is not None:
+            for a in groupchat.agents:
+                a.cache_client = self.cache_client
         for i in range(groupchat.max_round):
             groupchat.append(message, speaker)
             if self._is_termination_msg(message):
diff --git a/autogen/cache/cache.py b/autogen/cache/cache.py
new file mode 100644
index 000000000000..fbcfb9a9fc36
--- /dev/null
+++ b/autogen/cache/cache.py
@@ -0,0 +1,137 @@
+import os
+from typing import Dict, Any
+
+from autogen.cache.cache_factory import CacheFactory
+
+
+class Cache:
+    """
+    A wrapper class for managing cache configuration and instances.
+
+    This class provides a unified interface for creating and interacting with
+    different types of cache (e.g., Redis, Disk). It abstracts the underlying
+    cache implementation details, providing methods for cache operations.
+
+    Attributes:
+        config (Dict[str, Any]): A dictionary containing cache configuration.
+        cache: The cache instance created based on the provided configuration.
+
+    Methods:
+        redis(cache_seed=42, redis_url="redis://localhost:6379/0"): Static method to create a Redis cache instance.
+        disk(cache_seed=42, cache_path_root=".cache"): Static method to create a Disk cache instance.
+        __init__(self, config): Initializes the Cache with the given configuration.
+        __enter__(self): Context management entry, returning the cache instance.
+        __exit__(self, exc_type, exc_value, traceback): Context management exit.
+        get(self, key, default=None): Retrieves an item from the cache.
+        set(self, key, value): Sets an item in the cache.
+        close(self): Closes the cache.
+    """
+
+    ALLOWED_CONFIG_KEYS = ["cache_seed", "redis_url", "cache_path_root"]
+
+    @staticmethod
+    def redis(cache_seed=42, redis_url="redis://localhost:6379/0"):
+        """
+        Create a Redis cache instance.
+
+        Args:
+            cache_seed (int, optional): A seed for the cache. Defaults to 42.
+            redis_url (str, optional): The URL for the Redis server. Defaults to "redis://localhost:6379/0".
+
+        Returns:
+            Cache: A Cache instance configured for Redis.
+        """
+        return Cache({"cache_seed": cache_seed, "redis_url": redis_url})
+
+    @staticmethod
+    def disk(cache_seed=42, cache_path_root=".cache"):
+        """
+        Create a Disk cache instance.
+
+        Args:
+            cache_seed (int, optional): A seed for the cache. Defaults to 42.
+            cache_path_root (str, optional): The root path for the disk cache. Defaults to ".cache".
+
+        Returns:
+            Cache: A Cache instance configured for Disk caching.
+        """
+        return Cache({"cache_seed": cache_seed, "cache_path_root": cache_path_root})
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        Initialize the Cache with the given configuration.
+
+        Validates the configuration keys and creates the cache instance.
+
+        Args:
+            config (Dict[str, Any]): A dictionary containing the cache configuration.
+
+        Raises:
+            ValueError: If an invalid configuration key is provided.
+        """
+        self.config = config
+        # validate config
+        for key in self.config.keys():
+            if key not in self.ALLOWED_CONFIG_KEYS:
+                raise ValueError(f"Invalid config key: {key}")
+        # create cache instance
+        self.cache = CacheFactory.cache_factory(
+            self.config.get("cache_seed", "42"),
+            self.config.get("redis_url", None),
+            self.config.get("cache_path_root", None),
+        )
+
+    def __enter__(self):
+        """
+        Enter the runtime context related to the cache object.
+
+        Returns:
+            The cache instance for use within a context block.
+        """
+        return self.cache.__enter__()
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        """
+        Exit the runtime context related to the cache object.
+
+        Cleans up the cache instance and handles any exceptions that occurred
+        within the context.
+
+        Args:
+            exc_type: The exception type if an exception was raised in the context.
+            exc_value: The exception value if an exception was raised in the context.
+            traceback: The traceback if an exception was raised in the context.
+        """
+        return self.cache.__exit__(exc_type, exc_value, traceback)
+
+    def get(self, key, default=None):
+        """
+        Retrieve an item from the cache.
+
+        Args:
+            key (str): The key identifying the item in the cache.
+            default (optional): The default value to return if the key is not found.
+                                Defaults to None.
+
+        Returns:
+            The value associated with the key if found, else the default value.
+        """
+        return self.cache.get(key, default)
+
+    def set(self, key, value):
+        """
+        Set an item in the cache.
+
+        Args:
+            key (str): The key under which the item is to be stored.
+            value: The value to be stored in the cache.
+        """
+        self.cache.set(key, value)
+
+    def close(self):
+        """
+        Close the cache.
+
+        Perform any necessary cleanup, such as closing connections or releasing resources.
+        """
+        self.cache.close()
diff --git a/autogen/cache/cache_factory.py b/autogen/cache/cache_factory.py
index 67d1ba5c1a0b..3917fa2da1b6 100644
--- a/autogen/cache/cache_factory.py
+++ b/autogen/cache/cache_factory.py
@@ -6,34 +6,35 @@
     RedisCache = None
 
 
-def cache_factory(seed, redis_url):
-    """
-    Factory function for creating cache instances.
-
-    Based on the provided redis_url, this function decides whether to create a RedisCache
-    or DiskCache instance. If RedisCache is available and redis_url is provided,
-    a RedisCache instance is created. Otherwise, a DiskCache instance is used.
-
-    Args:
-        seed (str): A string used as a seed or namespace for the cache.
-                    This could be useful for creating distinct cache instances
-                    or for namespacing keys in the cache.
-        redis_url (str or None): The URL for the Redis server. If this is None
-                                 or if RedisCache is not available, a DiskCache instance is created.
-
-    Returns:
-        An instance of either RedisCache or DiskCache, depending on the availability of RedisCache
-        and the provided redis_url.
-
-    Examples:
-        Creating a Redis cache
-        > redis_cache = cache_factory("myseed", "redis://localhost:6379/0")
-
-        Creating a Disk cache
-        > disk_cache = cache_factory("myseed", None)
-    """
-    if RedisCache is not None and redis_url is not None:
-        return RedisCache(seed, redis_url)
-    else:
-        cache_path_root: str = ".cache"
-        return DiskCache(f"./{cache_path_root}/{seed}")
+class CacheFactory:
+    @staticmethod
+    def cache_factory(seed, redis_url=None, cache_path_root=".cache"):
+        """
+        Factory function for creating cache instances.
+
+        Based on the provided redis_url, this function decides whether to create a RedisCache
+        or DiskCache instance. If RedisCache is available and redis_url is provided,
+        a RedisCache instance is created. Otherwise, a DiskCache instance is used.
+
+        Args:
+            seed (str): A string used as a seed or namespace for the cache.
+                        This could be useful for creating distinct cache instances
+                        or for namespacing keys in the cache.
+            redis_url (str or None): The URL for the Redis server. If this is None
+                                     or if RedisCache is not available, a DiskCache instance is created.
+
+        Returns:
+            An instance of either RedisCache or DiskCache, depending on the availability of RedisCache
+            and the provided redis_url.
+
+        Examples:
+            Creating a Redis cache
+            > redis_cache = cache_factory("myseed", "redis://localhost:6379/0")
+
+            Creating a Disk cache
+            > disk_cache = cache_factory("myseed", None)
+        """
+        if RedisCache is not None and redis_url is not None:
+            return RedisCache(seed, redis_url)
+        else:
+            return DiskCache(f"./{cache_path_root}/{seed}")
diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 45d551ad8a2c..ab4c04816acf 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -8,6 +8,8 @@
 from flaml.automl.logger import logger_formatter
 
 from pydantic import BaseModel
+
+from autogen.cache.cache import Cache
 from autogen.oai import completion
 
 from autogen.oai.openai_utils import DEFAULT_AZURE_API_VERSION, get_key, OAI_PRICE1K
@@ -35,9 +37,6 @@
     from openai.types.completion import Completion
     from openai.types.completion_usage import CompletionUsage
 
-    # cache wrapper
-    from autogen.cache.cache_factory import cache_factory
-
     if openai.__version__ >= "1.1.0":
         TOOL_ENABLED = True
     ERROR = None
@@ -68,6 +67,7 @@ class OpenAIWrapper:
     openai_kwargs = openai_kwargs | aopenai_kwargs
     total_usage_summary: Optional[Dict[str, Any]] = None
     actual_usage_summary: Optional[Dict[str, Any]] = None
+    cache: Optional[Cache] = None
 
     def __init__(self, *, config_list: Optional[List[Dict[str, Any]]] = None, **base_config: Any):
         """
@@ -209,10 +209,7 @@ def create(self, **config: Any) -> ChatCompletion:
             - `cache_seed` (int | None) for the cache. Default to 41.
                 An integer cache_seed is useful when implementing "controlled randomness" for the completion.
                 None for no caching.
-            - `redis_url` (str | None) for the redis cache. Default to None.
-                A string redis_url formatted like "redis://:password@localhost:6379/0" will turn on the redis cache.
-                None for no redis cache. If `cache_seed` is None, redis_url will be ignored
-                You must install redis to use redis cache.
+                This is a legacy parameter. See [cache](/docs/Use-Cases/agent_chat#llmcaching) for more details.
             - filter_func (Callable | None): A function that takes in the context and the response
                 and returns a boolean to indicate whether the response is valid. E.g.,
 
@@ -241,13 +238,19 @@ def yes_or_no_filter(context, response):
             params = self._construct_create_params(create_config, extra_kwargs)
             # get the cache_seed, filter_func and context
             cache_seed = extra_kwargs.get("cache_seed", 41)
-            redis_url = extra_kwargs.get("redis_url", None)
             filter_func = extra_kwargs.get("filter_func")
             context = extra_kwargs.get("context")
 
-            # Try to load the response from cache
+            cache_client = None
             if cache_seed is not None:
-                with cache_factory(f"{cache_seed}", redis_url) as cache:
+                # Legacy cache behavior, if cache_seed is in the llm_config, use disk cache
+                cache_client = Cache.disk(cache_seed, ".cache")
+            elif self.cache is not None:
+                # Otherwise if they have passed in a cache, use that
+                cache_client = self.cache
+
+            if cache_client is not None:
+                with cache_client as cache:
                     # Try to get the response from cache
                     key = get_key(params)
                     response: ChatCompletion = cache.get(key, None)
@@ -282,9 +285,9 @@ def yes_or_no_filter(context, response):
                 # add cost calculation before caching no matter filter is passed or not
                 response.cost = self.cost(response)
                 self._update_usage_summary(response, use_cache=False)
-                if cache_seed is not None:
+                if cache_client is not None:
                     # Cache the response
-                    with cache_factory(f"{cache_seed}", redis_url) as cache:
+                    with cache_client as cache:
                         cache.set(key, response)
 
                 # check the filter
diff --git a/test/agentchat/test_cache.py b/test/agentchat/test_cache.py
deleted file mode 100644
index 9f958e81d568..000000000000
--- a/test/agentchat/test_cache.py
+++ /dev/null
@@ -1,131 +0,0 @@
-import os
-import sys
-import time
-
-import pytest
-import autogen
-from autogen.agentchat import AssistantAgent, UserProxyAgent
-
-sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
-from conftest import skip_openai, skip_redis  # noqa: E402
-
-try:
-    from openai import OpenAI
-except ImportError:
-    skip_openai_tests = True
-else:
-    skip_openai_tests = False or skip_openai
-
-try:
-    import redis
-except ImportError:
-    skip_redis_tests = True
-else:
-    skip_redis_tests = False or skip_redis
-
-
-@pytest.mark.skipif(skip_openai_tests, reason="openai not installed OR requested to skip")
-def test_disk_cache(human_input_mode="NEVER", max_consecutive_auto_reply=5):
-    random_cache_seed = int.from_bytes(os.urandom(2), "big")
-    start_time = time.time()
-    cold_cache_messages = run_conversation(
-        cache_seed=random_cache_seed,
-        human_input_mode=human_input_mode,
-        max_consecutive_auto_reply=max_consecutive_auto_reply,
-    )
-    end_time = time.time()
-    duration_with_cold_cache = end_time - start_time
-
-    start_time = time.time()
-    warm_cache_messages = run_conversation(
-        cache_seed=random_cache_seed,
-        human_input_mode=human_input_mode,
-        max_consecutive_auto_reply=max_consecutive_auto_reply,
-    )
-    end_time = time.time()
-    duration_with_warm_cache = end_time - start_time
-    assert cold_cache_messages == warm_cache_messages
-    assert duration_with_warm_cache < duration_with_cold_cache
-
-
-@pytest.mark.skipif(skip_openai_tests or skip_redis_tests, reason="redis not installed OR requested to skip")
-def test_redis_cache(human_input_mode="NEVER", max_consecutive_auto_reply=5):
-    random_cache_seed = int.from_bytes(os.urandom(2), "big")
-    redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
-    start_time = time.time()
-    cold_cache_messages = run_conversation(
-        cache_seed=random_cache_seed,
-        redis_url=redis_url,
-        human_input_mode=human_input_mode,
-        max_consecutive_auto_reply=max_consecutive_auto_reply,
-    )
-    end_time = time.time()
-    duration_with_cold_cache = end_time - start_time
-
-    start_time = time.time()
-    warm_cache_messages = run_conversation(
-        cache_seed=random_cache_seed,
-        redis_url=redis_url,
-        human_input_mode=human_input_mode,
-        max_consecutive_auto_reply=max_consecutive_auto_reply,
-    )
-    end_time = time.time()
-    duration_with_warm_cache = end_time - start_time
-    assert cold_cache_messages == warm_cache_messages
-    assert duration_with_warm_cache < duration_with_cold_cache
-
-
-def run_conversation(cache_seed, redis_url=None, human_input_mode="NEVER", max_consecutive_auto_reply=5):
-    KEY_LOC = "notebook"
-    OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
-    here = os.path.abspath(os.path.dirname(__file__))
-    config_list = autogen.config_list_from_json(
-        OAI_CONFIG_LIST,
-        file_location=KEY_LOC,
-        filter_dict={
-            "model": {
-                "gpt-3.5-turbo",
-                "gpt-35-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-16k-0613",
-                "gpt-3.5-turbo-0301",
-                "chatgpt-35-turbo-0301",
-                "gpt-35-turbo-v0301",
-                "gpt",
-            },
-        },
-    )
-    llm_config = {
-        "cache_seed": cache_seed,
-        "redis_url": redis_url,
-        "config_list": config_list,
-        "max_tokens": 1024,
-    }
-    assistant = AssistantAgent(
-        "coding_agent",
-        llm_config=llm_config,
-    )
-    user = UserProxyAgent(
-        "user",
-        human_input_mode=human_input_mode,
-        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
-        max_consecutive_auto_reply=max_consecutive_auto_reply,
-        code_execution_config={
-            "work_dir": f"{here}/test_agent_scripts",
-            "use_docker": "python:3",
-            "timeout": 60,
-        },
-        llm_config=llm_config,
-        system_message="""Is code provided but not enclosed in ``` blocks?
-    If so, remind that code blocks need to be enclosed in ``` blocks.
-    Reply TERMINATE to end the conversation if the task is finished. Don't say appreciation.
-    If "Thank you" or "You\'re welcome" are said in the conversation, then say TERMINATE and that is your last message.""",
-    )
-    user.initiate_chat(assistant, message="TERMINATE")
-    # should terminate without sending any message
-    assert assistant.last_message()["content"] == assistant.last_message(user)["content"] == "TERMINATE"
-    coding_task = "Print hello world to a file called hello.txt"
-
-    # track how long this takes
-    user.initiate_chat(assistant, message=coding_task)
-    return user.chat_messages[list(user.chat_messages.keys())[-0]]
diff --git a/test/agentchat/test_cache_agent.py b/test/agentchat/test_cache_agent.py
new file mode 100644
index 000000000000..218e275e272c
--- /dev/null
+++ b/test/agentchat/test_cache_agent.py
@@ -0,0 +1,228 @@
+import os
+import sys
+import time
+
+import pytest
+import autogen
+from autogen.agentchat import AssistantAgent, UserProxyAgent
+from autogen.cache.cache import Cache
+
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+from conftest import skip_openai, skip_redis  # noqa: E402
+
+try:
+    from openai import OpenAI
+except ImportError:
+    skip_openai_tests = True
+else:
+    skip_openai_tests = False or skip_openai
+
+try:
+    import redis
+except ImportError:
+    skip_redis_tests = True
+else:
+    skip_redis_tests = False or skip_redis
+
+
+@pytest.mark.skipif(skip_openai_tests, reason="openai not installed OR requested to skip")
+def test_legacy_disk_cache():
+    random_cache_seed = int.from_bytes(os.urandom(2), "big")
+    start_time = time.time()
+    cold_cache_messages = run_conversation(
+        cache_seed=random_cache_seed,
+    )
+    end_time = time.time()
+    duration_with_cold_cache = end_time - start_time
+
+    start_time = time.time()
+    warm_cache_messages = run_conversation(
+        cache_seed=random_cache_seed,
+    )
+    end_time = time.time()
+    duration_with_warm_cache = end_time - start_time
+    assert cold_cache_messages == warm_cache_messages
+    assert duration_with_warm_cache < duration_with_cold_cache
+
+
+@pytest.mark.skipif(skip_openai_tests or skip_redis_tests, reason="redis not installed OR requested to skip")
+def test_redis_cache():
+    random_cache_seed = int.from_bytes(os.urandom(2), "big")
+    redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
+    start_time = time.time()
+    with Cache.redis(random_cache_seed, redis_url) as cache_client:
+        cold_cache_messages = run_conversation(cache_seed=None, cache=cache_client)
+        end_time = time.time()
+        duration_with_cold_cache = end_time - start_time
+
+        start_time = time.time()
+        warm_cache_messages = run_conversation(cache_seed=None, cache=cache_client)
+        end_time = time.time()
+        duration_with_warm_cache = end_time - start_time
+        assert cold_cache_messages == warm_cache_messages
+        assert duration_with_warm_cache < duration_with_cold_cache
+
+    random_cache_seed = int.from_bytes(os.urandom(2), "big")
+    with Cache.redis(random_cache_seed, redis_url) as cache_client:
+        cold_cache_messages = run_groupchat_conversation(cache=cache_client)
+        end_time = time.time()
+        duration_with_cold_cache = end_time - start_time
+
+        start_time = time.time()
+        warm_cache_messages = run_groupchat_conversation(cache=cache_client)
+        end_time = time.time()
+        duration_with_warm_cache = end_time - start_time
+        assert cold_cache_messages == warm_cache_messages
+        assert duration_with_warm_cache < duration_with_cold_cache
+
+
+@pytest.mark.skipif(skip_openai_tests, reason="openai not installed OR requested to skip")
+def test_disk_cache():
+    random_cache_seed = int.from_bytes(os.urandom(2), "big")
+    start_time = time.time()
+    with Cache.disk(random_cache_seed) as cache_client:
+        cold_cache_messages = run_conversation(cache_seed=None, cache=cache_client)
+        end_time = time.time()
+        duration_with_cold_cache = end_time - start_time
+
+        start_time = time.time()
+        warm_cache_messages = run_conversation(cache_seed=None, cache=cache_client)
+        end_time = time.time()
+        duration_with_warm_cache = end_time - start_time
+        assert cold_cache_messages == warm_cache_messages
+        assert duration_with_warm_cache < duration_with_cold_cache
+
+    random_cache_seed = int.from_bytes(os.urandom(2), "big")
+    with Cache.disk(random_cache_seed) as cache_client:
+        cold_cache_messages = run_groupchat_conversation(cache=cache_client)
+        end_time = time.time()
+        duration_with_cold_cache = end_time - start_time
+
+        start_time = time.time()
+        warm_cache_messages = run_groupchat_conversation(cache=cache_client)
+        end_time = time.time()
+        duration_with_warm_cache = end_time - start_time
+        assert cold_cache_messages == warm_cache_messages
+        assert duration_with_warm_cache < duration_with_cold_cache
+
+
+def run_conversation(cache_seed, human_input_mode="NEVER", max_consecutive_auto_reply=5, cache=None):
+    KEY_LOC = "notebook"
+    OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
+    here = os.path.abspath(os.path.dirname(__file__))
+    config_list = autogen.config_list_from_json(
+        OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+        filter_dict={
+            "model": {
+                "gpt-3.5-turbo",
+                "gpt-35-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-16k-0613",
+                "gpt-3.5-turbo-0301",
+                "chatgpt-35-turbo-0301",
+                "gpt-35-turbo-v0301",
+                "gpt",
+            },
+        },
+    )
+    llm_config = {
+        "cache_seed": cache_seed,
+        "config_list": config_list,
+        "max_tokens": 1024,
+    }
+    assistant = AssistantAgent(
+        "coding_agent",
+        llm_config=llm_config,
+    )
+    user = UserProxyAgent(
+        "user",
+        human_input_mode=human_input_mode,
+        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
+        max_consecutive_auto_reply=max_consecutive_auto_reply,
+        code_execution_config={
+            "work_dir": f"{here}/test_agent_scripts",
+            "use_docker": "python:3",
+            "timeout": 60,
+        },
+        llm_config=llm_config,
+        system_message="""Is code provided but not enclosed in ``` blocks?
+    If so, remind that code blocks need to be enclosed in ``` blocks.
+    Reply TERMINATE to end the conversation if the task is finished. Don't say appreciation.
+    If "Thank you" or "You\'re welcome" are said in the conversation, then say TERMINATE and that is your last message.""",
+    )
+
+    user.initiate_chat(assistant, message="TERMINATE", cache_client=cache)
+    # should terminate without sending any message
+    assert assistant.last_message()["content"] == assistant.last_message(user)["content"] == "TERMINATE"
+    coding_task = "Print hello world to a file called hello.txt"
+
+    # track how long this takes
+    user.initiate_chat(assistant, message=coding_task, cache_client=cache)
+    return user.chat_messages[list(user.chat_messages.keys())[-0]]
+
+
+def run_groupchat_conversation(cache, human_input_mode="NEVER", max_consecutive_auto_reply=5):
+    KEY_LOC = "notebook"
+    OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
+    here = os.path.abspath(os.path.dirname(__file__))
+    config_list = autogen.config_list_from_json(
+        OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+        filter_dict={
+            "model": {
+                "gpt-3.5-turbo",
+                "gpt-35-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-16k-0613",
+                "gpt-3.5-turbo-0301",
+                "chatgpt-35-turbo-0301",
+                "gpt-35-turbo-v0301",
+                "gpt",
+            },
+        },
+    )
+    llm_config = {
+        "cache_seed": None,
+        "config_list": config_list,
+        "max_tokens": 1024,
+    }
+    assistant = AssistantAgent(
+        "coding_agent",
+        llm_config=llm_config,
+    )
+
+    planner = AssistantAgent(
+        "planner",
+        llm_config=llm_config,
+    )
+
+    user = UserProxyAgent(
+        "user",
+        human_input_mode=human_input_mode,
+        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
+        max_consecutive_auto_reply=max_consecutive_auto_reply,
+        code_execution_config={
+            "work_dir": f"{here}/test_agent_scripts",
+            "use_docker": "python:3",
+            "timeout": 60,
+        },
+        llm_config=llm_config,
+        system_message="""Is code provided but not enclosed in ``` blocks?
+    If so, remind that code blocks need to be enclosed in ``` blocks.
+    Reply TERMINATE to end the conversation if the task is finished. Don't say appreciation.
+    If "Thank you" or "You\'re welcome" are said in the conversation, then say TERMINATE and that is your last message.""",
+    )
+
+    group_chat = autogen.GroupChat(
+        agents=[planner, assistant, user],
+        messages=[],
+        max_round=4,
+        speaker_selection_method="round_robin",
+    )
+    manager = autogen.GroupChatManager(groupchat=group_chat, llm_config=llm_config)
+
+    coding_task = "Print hello world to a file called hello.txt"
+
+    user.initiate_chat(manager, message=coding_task, cache_client=cache)
+    return user.chat_messages[list(user.chat_messages.keys())[-0]]
diff --git a/test/cache/test_cache.py b/test/cache/test_cache.py
new file mode 100644
index 000000000000..ba1a0befeaaa
--- /dev/null
+++ b/test/cache/test_cache.py
@@ -0,0 +1,53 @@
+import unittest
+from unittest.mock import patch, MagicMock
+from autogen.cache.cache import Cache
+
+
+class TestCache(unittest.TestCase):
+    def setUp(self):
+        self.config = {"cache_seed": "test_seed", "redis_url": "redis://test", "cache_path_root": ".test_cache"}
+
+    @patch("autogen.cache.cache_factory.CacheFactory.cache_factory", return_value=MagicMock())
+    def test_init(self, mock_cache_factory):
+        cache = Cache(self.config)
+        self.assertIsInstance(cache.cache, MagicMock)
+        mock_cache_factory.assert_called_with("test_seed", "redis://test", ".test_cache")
+
+    @patch("autogen.cache.cache_factory.CacheFactory.cache_factory", return_value=MagicMock())
+    def test_context_manager(self, mock_cache_factory):
+        mock_cache_instance = MagicMock()
+        mock_cache_factory.return_value = mock_cache_instance
+
+        with Cache(self.config) as cache:
+            self.assertIsInstance(cache, MagicMock)
+
+        mock_cache_instance.__enter__.assert_called()
+        mock_cache_instance.__exit__.assert_called()
+
+    @patch("autogen.cache.cache_factory.CacheFactory.cache_factory", return_value=MagicMock())
+    def test_get_set(self, mock_cache_factory):
+        key = "key"
+        value = "value"
+        mock_cache_instance = MagicMock()
+        mock_cache_factory.return_value = mock_cache_instance
+
+        cache = Cache(self.config)
+        cache.set(key, value)
+        cache.get(key)
+
+        mock_cache_instance.set.assert_called_with(key, value)
+        mock_cache_instance.get.assert_called_with(key, None)
+
+    @patch("autogen.cache.cache_factory.CacheFactory.cache_factory", return_value=MagicMock())
+    def test_close(self, mock_cache_factory):
+        mock_cache_instance = MagicMock()
+        mock_cache_factory.return_value = mock_cache_instance
+
+        cache = Cache(self.config)
+        cache.close()
+
+        mock_cache_instance.close.assert_called()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/website/docs/Use-Cases/agent_chat.md b/website/docs/Use-Cases/agent_chat.md
index e55c603e9a00..cb7c71b8c89d 100644
--- a/website/docs/Use-Cases/agent_chat.md
+++ b/website/docs/Use-Cases/agent_chat.md
@@ -285,6 +285,35 @@ By adopting the conversation-driven control with both programming language and n
 - LLM-based function call. In this approach, LLM decides whether or not to call a particular function depending on the conversation status in each inference call.
   By messaging additional agents in the called functions, the LLM can drive dynamic multi-agent conversation. A working system showcasing this type of dynamic conversation can be found in the [multi-user math problem solving scenario](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_two_users.ipynb), where a student assistant would automatically resort to an expert using function calls.
 
+## LLM Caching
+
+### Legacy Disk Cache
+By default, you can specify a `cache_seed` in your `llm_config` in order to take advantage of a local [DiskCache](https://grantjenks.com/docs/diskcache/) backed cache. This cache will be used to store the results of your LLM calls, and will be used to return results for the same input without making a call to the LLM. This is useful for saving on compute costs, and for speeding up inference.
+
+```python
+assistant = AssistantAgent(
+    "coding_agent",
+    llm_config={
+        "cache_seed": 42,
+        "config_list": OAI_CONFIG_LIST,
+        "max_tokens": 1024,
+    },
+)
+```
+
+Setting this `cache_seed` param to `None` will disable the cache.
+
+### Configurable Context Manager
+A new configurable context manager allows you to easily turn on and off LLM cache, using either DiskCache or Redis. All LLM agents inside the context manager will use the same cache.
+```python
+from autogen.cache.cache import Cache
+
+with Cache.redis(cache_seed=42, redis_url="redis://localhost:6379/0") as cache_client:
+    user.initiate_chat(assistant, message=coding_task, cache_client=cache_client)
+
+with Cache.disk(cache_seed=42, cache_dir=".cache") as cache_client:
+    user.initiate_chat(assistant, message=coding_task, cache_client=cache_client)
+```
 
 ### Diverse Applications Implemented with AutoGen
 

From d7d4dacfc8355db6a65b46479661efa9bad1759e Mon Sep 17 00:00:00 2001
From: Eric Zhu <ekzhu@users.noreply.github.com>
Date: Mon, 15 Jan 2024 19:17:32 -0800
Subject: [PATCH 04/14] Update agent_chat.md

---
 website/docs/Use-Cases/agent_chat.md | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/website/docs/Use-Cases/agent_chat.md b/website/docs/Use-Cases/agent_chat.md
index cb7c71b8c89d..3fb9dbde57dd 100644
--- a/website/docs/Use-Cases/agent_chat.md
+++ b/website/docs/Use-Cases/agent_chat.md
@@ -287,7 +287,19 @@ By adopting the conversation-driven control with both programming language and n
 
 ## LLM Caching
 
-### Legacy Disk Cache
+### Configurable Context Manager
+A new configurable context manager allows you to easily turn on and off LLM cache, using either DiskCache or Redis. All LLM agents inside the context manager will use the same cache.
+```python
+from autogen.cache.cache import Cache
+
+with Cache.redis(cache_seed=42, redis_url="redis://localhost:6379/0") as cache_client:
+    user.initiate_chat(assistant, message=coding_task, cache_client=cache_client)
+
+with Cache.disk(cache_seed=42, cache_dir=".cache") as cache_client:
+    user.initiate_chat(assistant, message=coding_task, cache_client=cache_client)
+```
+
+### Disk Cache (Legacy)
 By default, you can specify a `cache_seed` in your `llm_config` in order to take advantage of a local [DiskCache](https://grantjenks.com/docs/diskcache/) backed cache. This cache will be used to store the results of your LLM calls, and will be used to return results for the same input without making a call to the LLM. This is useful for saving on compute costs, and for speeding up inference.
 
 ```python
@@ -303,18 +315,6 @@ assistant = AssistantAgent(
 
 Setting this `cache_seed` param to `None` will disable the cache.
 
-### Configurable Context Manager
-A new configurable context manager allows you to easily turn on and off LLM cache, using either DiskCache or Redis. All LLM agents inside the context manager will use the same cache.
-```python
-from autogen.cache.cache import Cache
-
-with Cache.redis(cache_seed=42, redis_url="redis://localhost:6379/0") as cache_client:
-    user.initiate_chat(assistant, message=coding_task, cache_client=cache_client)
-
-with Cache.disk(cache_seed=42, cache_dir=".cache") as cache_client:
-    user.initiate_chat(assistant, message=coding_task, cache_client=cache_client)
-```
-
 ### Diverse Applications Implemented with AutoGen
 
 The figure below shows six examples of applications built using AutoGen.

From 354157af2f6907668994dd344599ec594d74cd97 Mon Sep 17 00:00:00 2001
From: Vijay Ramesh <vijay@regrello.com>
Date: Wed, 17 Jan 2024 22:17:39 -0800
Subject: [PATCH 05/14] more PR feedback, remove tests from contrib and have
 them run with the normal jobs

---
 .github/workflows/build.yml          |  1 +
 .github/workflows/contrib-openai.yml | 45 ----------------------------
 .github/workflows/contrib-tests.yml  | 33 --------------------
 .github/workflows/openai.yml         |  7 +++++
 4 files changed, 8 insertions(+), 78 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 2f1af9ab744c..8f8b500cf0eb 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -49,6 +49,7 @@ jobs:
         if: matrix.python-version == '3.10'
         run: |
           pip install -e .[test]
+          pip install -e .[redis]
           coverage run -a -m pytest test --ignore=test/agentchat/contrib --skip-openai
           coverage xml
       - name: Upload coverage to Codecov
diff --git a/.github/workflows/contrib-openai.yml b/.github/workflows/contrib-openai.yml
index 7a8edd874468..90eac3488ed3 100644
--- a/.github/workflows/contrib-openai.yml
+++ b/.github/workflows/contrib-openai.yml
@@ -217,48 +217,3 @@ jobs:
         with:
           file: ./coverage.xml
           flags: unittests
-  CacheTest:
-    strategy:
-      matrix:
-        os: [ ubuntu-latest ]
-        python-version: [ "3.11" ]
-    runs-on: ${{ matrix.os }}
-    environment: openai1
-    services:
-      redis:
-        image: redis
-        ports:
-          - 6379:6379
-        options: --entrypoint redis-server
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install packages and dependencies
-        run: |
-          docker --version
-          python -m pip install --upgrade pip wheel
-          pip install -e .
-          pip install docker
-          python -c "import autogen"
-          pip install coverage pytest-asyncio
-      - name: Install packages and dependencies for redis
-        run: |
-          pip install -e .[redis]
-      - name: Coverage
-        env:
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
-          AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
-          OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
-        run: |
-          pip install coverage>=5.3
-          coverage run -a -m pytest test/agentchat/test_cache_agent.py
-          coverage xml
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v3
-        with:
-          file: ./coverage.xml
-          flags: unittests
diff --git a/.github/workflows/contrib-tests.yml b/.github/workflows/contrib-tests.yml
index 4c98975a242b..27a616b77190 100644
--- a/.github/workflows/contrib-tests.yml
+++ b/.github/workflows/contrib-tests.yml
@@ -207,36 +207,3 @@ jobs:
         with:
           file: ./coverage.xml
           flags: unittests
-  CacheTest:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ ubuntu-latest, macos-latest, windows-2019 ]
-        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install packages and dependencies for all tests
-        run: |
-          python -m pip install --upgrade pip wheel
-          pip install -e .[test]
-          pip install -e .[redis]
-      - name: Test Cache
-        run: |
-          pytest test/cache/test_cache.py test/cache/test_redis_cache.py test/cache/test_disk_cache.py --skip-openai
-      - name: Coverage
-        if: matrix.python-version == '3.10'
-        run: |
-          pip install coverage>=5.3
-          coverage run -a -m pytest test/cache/test_cache.py test/cache/test_redis_cache.py test/cache/test_disk_cache.py --skip-openai
-          coverage xml
-      - name: Upload coverage to Codecov
-        if: matrix.python-version == '3.10'
-        uses: codecov/codecov-action@v3
-        with:
-          file: ./coverage.xml
-          flags: unittests
diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml
index 83679b58e388..e86c80d1b439 100644
--- a/.github/workflows/openai.yml
+++ b/.github/workflows/openai.yml
@@ -21,6 +21,12 @@ jobs:
         python-version: ["3.9", "3.10", "3.11"]
     runs-on: ${{ matrix.os }}
     environment: openai1
+    services:
+      redis:
+        image: redis
+        ports:
+          - 6379:6379
+        options: --entrypoint redis-server
     steps:
       # checkout to pr branch
       - name: Checkout
@@ -42,6 +48,7 @@ jobs:
         if: matrix.python-version == '3.9'
         run: |
           pip install docker
+          pip install -e .[redis]
       - name: Coverage
         if: matrix.python-version == '3.9'
         env:

From 6a558acb820727a076926076ddb1762c3c397bfb Mon Sep 17 00:00:00 2001
From: Eric Zhu <ekzhu@users.noreply.github.com>
Date: Thu, 18 Jan 2024 00:13:50 -0800
Subject: [PATCH 06/14] doc

---
 website/docs/Use-Cases/agent_chat.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/website/docs/Use-Cases/agent_chat.md b/website/docs/Use-Cases/agent_chat.md
index 3fb9dbde57dd..f8ff4353a724 100644
--- a/website/docs/Use-Cases/agent_chat.md
+++ b/website/docs/Use-Cases/agent_chat.md
@@ -285,9 +285,9 @@ By adopting the conversation-driven control with both programming language and n
 - LLM-based function call. In this approach, LLM decides whether or not to call a particular function depending on the conversation status in each inference call.
   By messaging additional agents in the called functions, the LLM can drive dynamic multi-agent conversation. A working system showcasing this type of dynamic conversation can be found in the [multi-user math problem solving scenario](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_two_users.ipynb), where a student assistant would automatically resort to an expert using function calls.
 
-## LLM Caching
+### LLM Caching
 
-### Configurable Context Manager
+#### Configurable Context Manager
 A new configurable context manager allows you to easily turn on and off LLM cache, using either DiskCache or Redis. All LLM agents inside the context manager will use the same cache.
 ```python
 from autogen.cache.cache import Cache
@@ -299,7 +299,7 @@ with Cache.disk(cache_seed=42, cache_dir=".cache") as cache_client:
     user.initiate_chat(assistant, message=coding_task, cache_client=cache_client)
 ```
 
-### Disk Cache (Legacy)
+#### Disk Cache (Legacy)
 By default, you can specify a `cache_seed` in your `llm_config` in order to take advantage of a local [DiskCache](https://grantjenks.com/docs/diskcache/) backed cache. This cache will be used to store the results of your LLM calls, and will be used to return results for the same input without making a call to the LLM. This is useful for saving on compute costs, and for speeding up inference.
 
 ```python

From b0a1d541a24dc05c15399c32175fd239a7e1adc5 Mon Sep 17 00:00:00 2001
From: Eric Zhu <ekzhu@users.noreply.github.com>
Date: Thu, 18 Jan 2024 00:20:55 -0800
Subject: [PATCH 07/14] updated

---
 website/docs/Installation.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/website/docs/Installation.md b/website/docs/Installation.md
index a04da82767de..b3b66917d4ba 100644
--- a/website/docs/Installation.md
+++ b/website/docs/Installation.md
@@ -213,6 +213,17 @@ Otherwise, reply CONTINUE, or the reason why the task is not solved yet."""
 )
 ```
 
+- #### LLM Caching
+
+To use LLM caching with Redis, you need to install the Python package with
+the option `redis`:
+
+```bash
+pip install "pyautogen[redis]"
+```
+
+See [LLM Caching](Use-Cases/agent_chat.md#llm-caching) for details.
+
 - #### blendsearch
 
 `pyautogen<0.2` offers a cost-effective hyperparameter optimization technique [EcoOptiGen](https://arxiv.org/abs/2303.04673) for tuning Large Language Models. Please install with the [blendsearch] option to use it.

From 8df3eba9a078a483dd342fe8b02b8f3d8365d602 Mon Sep 17 00:00:00 2001
From: Eric Zhu <ekzhu@users.noreply.github.com>
Date: Thu, 18 Jan 2024 09:12:30 -0800
Subject: [PATCH 08/14] Update website/docs/Use-Cases/agent_chat.md

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
---
 website/docs/Use-Cases/agent_chat.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/Use-Cases/agent_chat.md b/website/docs/Use-Cases/agent_chat.md
index f8ff4353a724..2a389f525559 100644
--- a/website/docs/Use-Cases/agent_chat.md
+++ b/website/docs/Use-Cases/agent_chat.md
@@ -288,7 +288,7 @@ By adopting the conversation-driven control with both programming language and n
 ### LLM Caching
 
 #### Configurable Context Manager
-A new configurable context manager allows you to easily turn on and off LLM cache, using either DiskCache or Redis. All LLM agents inside the context manager will use the same cache.
+(>=0.2.8) A new configurable context manager allows you to easily turn on and off LLM cache, using either DiskCache or Redis. All LLM agents inside the context manager will use the same cache.
 ```python
 from autogen.cache.cache import Cache
 

From db46d5f96ea72972c0bb4e5edc5c6554f73cc547 Mon Sep 17 00:00:00 2001
From: Eric Zhu <ekzhu@users.noreply.github.com>
Date: Thu, 18 Jan 2024 09:32:03 -0800
Subject: [PATCH 09/14] update docs

---
 website/docs/Use-Cases/agent_chat.md         | 20 ++----------
 website/docs/Use-Cases/enhanced_inference.md | 34 +++++++++++++++++---
 2 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/website/docs/Use-Cases/agent_chat.md b/website/docs/Use-Cases/agent_chat.md
index 2a389f525559..7006c4ac10fe 100644
--- a/website/docs/Use-Cases/agent_chat.md
+++ b/website/docs/Use-Cases/agent_chat.md
@@ -287,8 +287,7 @@ By adopting the conversation-driven control with both programming language and n
 
 ### LLM Caching
 
-#### Configurable Context Manager
-(>=0.2.8) A new configurable context manager allows you to easily turn on and off LLM cache, using either DiskCache or Redis. All LLM agents inside the context manager will use the same cache.
+Since version 0.2.8, a configurable context manager allows you to easily configure LLM cache, using either DiskCache or Redis. All agents inside the context manager will use the same cache.
 ```python
 from autogen.cache.cache import Cache
 
@@ -299,21 +298,8 @@ with Cache.disk(cache_seed=42, cache_dir=".cache") as cache_client:
     user.initiate_chat(assistant, message=coding_task, cache_client=cache_client)
 ```
 
-#### Disk Cache (Legacy)
-By default, you can specify a `cache_seed` in your `llm_config` in order to take advantage of a local [DiskCache](https://grantjenks.com/docs/diskcache/) backed cache. This cache will be used to store the results of your LLM calls, and will be used to return results for the same input without making a call to the LLM. This is useful for saving on compute costs, and for speeding up inference.
-
-```python
-assistant = AssistantAgent(
-    "coding_agent",
-    llm_config={
-        "cache_seed": 42,
-        "config_list": OAI_CONFIG_LIST,
-        "max_tokens": 1024,
-    },
-)
-```
-
-Setting this `cache_seed` param to `None` will disable the cache.
+DiskCache is on by default with `cache_seed` set to 41.
+See [Caching](./enhanced_inference.md#caching) for more details.
 
 ### Diverse Applications Implemented with AutoGen
 
diff --git a/website/docs/Use-Cases/enhanced_inference.md b/website/docs/Use-Cases/enhanced_inference.md
index 529159a1f22a..7d38139ff967 100644
--- a/website/docs/Use-Cases/enhanced_inference.md
+++ b/website/docs/Use-Cases/enhanced_inference.md
@@ -168,20 +168,44 @@ Total cost: 0.00027
 
 ## Caching
 
-API call results are cached locally and reused when the same request is issued. This is useful when repeating or continuing experiments for reproducibility and cost saving. It still allows controlled randomness by setting the "cache_seed" specified in `OpenAIWrapper.create()` or the constructor of `OpenAIWrapper`.
+API call results are cached locally and reused when the same request is issued. This is useful when repeating or continuing experiments for reproducibility and cost saving.
+
+Starting version 0.2.8, a configurable context manager allows you to easily configure
+the cache, using either DiskCache or Redis.
+All LLM agents inside the context manager will use the same cache.
 
 ```python
-client = OpenAIWrapper(cache_seed=...)
-client.create(...)
+from autogen.cache.cache import Cache
+
+with Cache.redis(cache_seed=42, redis_url="redis://localhost:6379/0") as cache_client:
+    user.initiate_chat(assistant, message=coding_task, cache_client=cache_client)
+
+with Cache.disk(cache_seed=42, cache_dir=".cache") as cache_client:
+    user.initiate_chat(assistant, message=coding_task, cache_client=cache_client)
 ```
 
+You can control the randomness by setting the `cache_seed` parameter.
+
+### Turnning off cache
+For backward compatibility, DiskCache is always enabled by default
+with `cache_seed` set to 41. To fully disable it, set `cache_seed` to None.
+
 ```python
+# You turn off cache directly through the agent,
+assistant = AssistantAgent(
+    "coding_agent",
+    llm_config={
+        "cache_seed": None,
+        "config_list": OAI_CONFIG_LIST,
+        "max_tokens": 1024,
+    },
+)
+
+# or in the OpenAIWrapper create method.
 client = OpenAIWrapper()
 client.create(cache_seed=..., ...)
 ```
 
-Caching is enabled by default with cache_seed 41. To disable it please set `cache_seed` to None.
-
 _NOTE_. openai v1.1 introduces a new param `seed`. The difference between autogen's `cache_seed` and openai's `seed` is that:
 * autogen uses local disk cache to guarantee the exactly same output is produced for the same input and when cache is hit, no openai api call will be made.
 * openai's `seed` is a best-effort deterministic sampling with no guarantee of determinism. When using openai's `seed` with `cache_seed` set to None, even for the same input, an openai api call will be made and there is no guarantee for getting exactly the same output.

From f9df0d711e818de315ee5c53fbe57e6e084e26e6 Mon Sep 17 00:00:00 2001
From: Eric Zhu <ekzhu@users.noreply.github.com>
Date: Thu, 18 Jan 2024 23:41:37 -0800
Subject: [PATCH 10/14] update docs; let openaiwrapper to use cache object

---
 autogen/agentchat/conversable_agent.py       |  19 ++-
 autogen/agentchat/groupchat.py               |   4 +-
 autogen/oai/client.py                        |  28 ++--
 test/agentchat/test_cache_agent.py           |   7 +-
 test/oai/test_client.py                      | 140 ++++++++++++++++++-
 website/docs/Use-Cases/agent_chat.md         |  24 +++-
 website/docs/Use-Cases/enhanced_inference.md |  38 ++---
 7 files changed, 212 insertions(+), 48 deletions(-)

diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py
index 60a1fe6b5355..488566c865f1 100644
--- a/autogen/agentchat/conversable_agent.py
+++ b/autogen/agentchat/conversable_agent.py
@@ -137,7 +137,7 @@ def __init__(
             self.client = OpenAIWrapper(**self.llm_config)
 
         # initialize standalone cache client
-        self.cache_client = None
+        self.client_cache = None
 
         self._code_execution_config: Union[Dict, Literal[False]] = (
             {} if code_execution_config is None else code_execution_config
@@ -669,7 +669,7 @@ def initiate_chat(
         recipient: "ConversableAgent",
         clear_history: Optional[bool] = True,
         silent: Optional[bool] = False,
-        cache_client: Optional[Cache] = None,
+        cache: Optional[Cache] = None,
         **context,
     ):
         """Initiate a chat with the recipient agent.
@@ -682,7 +682,7 @@ def initiate_chat(
             recipient: the recipient agent.
             clear_history (bool): whether to clear the chat history with the agent.
             silent (bool or None): (Experimental) whether to print the messages for this conversation.
-            cache_client (Cache or None): the cache client to be used for this conversation.
+            cache (Cache or None): the cache client to be used for this conversation.
             **context: any context information.
                 "message" needs to be provided if the `generate_init_message` method is not overridden.
 
@@ -692,8 +692,8 @@ def initiate_chat(
         for agent in [self, recipient]:
             agent._raise_exception_on_async_reply_functions()
         self._prepare_chat(recipient, clear_history)
-        self.cache_client = cache_client
-        recipient.cache_client = cache_client
+        self.client_cache = cache
+        recipient.client_cache = cache
         self.send(self.generate_init_message(**context), recipient, silent=silent)
 
     async def a_initiate_chat(
@@ -701,6 +701,7 @@ async def a_initiate_chat(
         recipient: "ConversableAgent",
         clear_history: Optional[bool] = True,
         silent: Optional[bool] = False,
+        cache: Optional[Cache] = None,
         **context,
     ):
         """(async) Initiate a chat with the recipient agent.
@@ -713,10 +714,13 @@ async def a_initiate_chat(
             recipient: the recipient agent.
             clear_history (bool): whether to clear the chat history with the agent.
             silent (bool or None): (Experimental) whether to print the messages for this conversation.
+            cache (Cache or None): the cache client to be used for this conversation.
             **context: any context information.
                 "message" needs to be provided if the `generate_init_message` method is not overridden.
         """
         self._prepare_chat(recipient, clear_history)
+        self.client_cache = cache
+        recipient.client_cache = cache
         await self.a_send(self.generate_init_message(**context), recipient, silent=silent)
 
     def reset(self):
@@ -783,9 +787,10 @@ def generate_oai_reply(
                 all_messages.append(message)
 
         # TODO: #1143 handle token limit exceeded error
-        client.cache = self.cache_client
         response = client.create(
-            context=messages[-1].pop("context", None), messages=self._oai_system_message + all_messages
+            context=messages[-1].pop("context", None),
+            messages=self._oai_system_message + all_messages,
+            cache=self.client_cache,
         )
 
         extracted_response = client.extract_text_or_completion_object(response)[0]
diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py
index 2006b2169e64..9e5548b16652 100644
--- a/autogen/agentchat/groupchat.py
+++ b/autogen/agentchat/groupchat.py
@@ -356,9 +356,9 @@ def run_chat(
         message = messages[-1]
         speaker = sender
         groupchat = config
-        if self.cache_client is not None:
+        if self.client_cache is not None:
             for a in groupchat.agents:
-                a.cache_client = self.cache_client
+                a.client_cache = self.client_cache
         for i in range(groupchat.max_round):
             groupchat.append(message, speaker)
             if self._is_termination_msg(message):
diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index ab4c04816acf..7fce740680ce 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -48,11 +48,15 @@
     _ch.setFormatter(logger_formatter)
     logger.addHandler(_ch)
 
+LEGACY_DEFAULT_CACHE_SEED = 41
+LEGACY_CACHE_DIR = ".cache"
+
 
 class OpenAIWrapper:
     """A wrapper class for openai client."""
 
     extra_kwargs = {
+        "cache",
         "cache_seed",
         "filter_func",
         "allow_format_str_template",
@@ -67,7 +71,6 @@ class OpenAIWrapper:
     openai_kwargs = openai_kwargs | aopenai_kwargs
     total_usage_summary: Optional[Dict[str, Any]] = None
     actual_usage_summary: Optional[Dict[str, Any]] = None
-    cache: Optional[Cache] = None
 
     def __init__(self, *, config_list: Optional[List[Dict[str, Any]]] = None, **base_config: Any):
         """
@@ -206,10 +209,14 @@ def create(self, **config: Any) -> ChatCompletion:
                 The actual prompt will be:
                 "Complete the following sentence: Today I feel".
                 More examples can be found at [templating](/docs/Use-Cases/enhanced_inference#templating).
-            - `cache_seed` (int | None) for the cache. Default to 41.
+            - cache (Cache | None): A Cache object to use for response cache. Default to None.
+                Note that the cache argument overrides the legacy cache_seed argument: if this argument is provided,
+                then the cache_seed argument is ignored. If this argument is not provided or None,
+                then the cache_seed argument is used.
+            - (Legacy) cache_seed (int | None) for using the DiskCache. Default to 41.
                 An integer cache_seed is useful when implementing "controlled randomness" for the completion.
                 None for no caching.
-                This is a legacy parameter. See [cache](/docs/Use-Cases/agent_chat#llmcaching) for more details.
+                Note: this is a legacy argument. It is only used when the cache argument is not provided.
             - filter_func (Callable | None): A function that takes in the context and the response
                 and returns a boolean to indicate whether the response is valid. E.g.,
 
@@ -237,17 +244,18 @@ def yes_or_no_filter(context, response):
             # construct the create params
             params = self._construct_create_params(create_config, extra_kwargs)
             # get the cache_seed, filter_func and context
-            cache_seed = extra_kwargs.get("cache_seed", 41)
+            cache_seed = extra_kwargs.get("cache_seed", LEGACY_DEFAULT_CACHE_SEED)
+            cache = extra_kwargs.get("cache")
             filter_func = extra_kwargs.get("filter_func")
             context = extra_kwargs.get("context")
 
             cache_client = None
-            if cache_seed is not None:
-                # Legacy cache behavior, if cache_seed is in the llm_config, use disk cache
-                cache_client = Cache.disk(cache_seed, ".cache")
-            elif self.cache is not None:
-                # Otherwise if they have passed in a cache, use that
-                cache_client = self.cache
+            if cache is not None:
+                # Use the cache object if provided.
+                cache_client = cache
+            elif cache_seed is not None:
+                # Legacy cache behavior, if cache_seed is given, use DiskCache.
+                cache_client = Cache.disk(cache_seed, LEGACY_CACHE_DIR)
 
             if cache_client is not None:
                 with cache_client as cache:
diff --git a/test/agentchat/test_cache_agent.py b/test/agentchat/test_cache_agent.py
index 218e275e272c..deb5e377923c 100644
--- a/test/agentchat/test_cache_agent.py
+++ b/test/agentchat/test_cache_agent.py
@@ -152,13 +152,13 @@ def run_conversation(cache_seed, human_input_mode="NEVER", max_consecutive_auto_
     If "Thank you" or "You\'re welcome" are said in the conversation, then say TERMINATE and that is your last message.""",
     )
 
-    user.initiate_chat(assistant, message="TERMINATE", cache_client=cache)
+    user.initiate_chat(assistant, message="TERMINATE", cache=cache)
     # should terminate without sending any message
     assert assistant.last_message()["content"] == assistant.last_message(user)["content"] == "TERMINATE"
     coding_task = "Print hello world to a file called hello.txt"
 
     # track how long this takes
-    user.initiate_chat(assistant, message=coding_task, cache_client=cache)
+    user.initiate_chat(assistant, message=coding_task, cache=cache)
     return user.chat_messages[list(user.chat_messages.keys())[-0]]
 
 
@@ -207,7 +207,6 @@ def run_groupchat_conversation(cache, human_input_mode="NEVER", max_consecutive_
             "use_docker": "python:3",
             "timeout": 60,
         },
-        llm_config=llm_config,
         system_message="""Is code provided but not enclosed in ``` blocks?
     If so, remind that code blocks need to be enclosed in ``` blocks.
     Reply TERMINATE to end the conversation if the task is finished. Don't say appreciation.
@@ -224,5 +223,5 @@ def run_groupchat_conversation(cache, human_input_mode="NEVER", max_consecutive_
 
     coding_task = "Print hello world to a file called hello.txt"
 
-    user.initiate_chat(manager, message=coding_task, cache_client=cache)
+    user.initiate_chat(manager, message=coding_task, cache=cache)
     return user.chat_messages[list(user.chat_messages.keys())[-0]]
diff --git a/test/oai/test_client.py b/test/oai/test_client.py
index 7f561187d491..b8c93e13d707 100644
--- a/test/oai/test_client.py
+++ b/test/oai/test_client.py
@@ -1,7 +1,11 @@
+import shutil
+import time
 import pytest
 from autogen import OpenAIWrapper, config_list_from_json, config_list_openai_aoai
+from autogen.oai.client import LEGACY_CACHE_DIR, LEGACY_DEFAULT_CACHE_SEED
 import sys
 import os
+from autogen.cache.cache import Cache
 
 sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 from conftest import skip_openai  # noqa: E402
@@ -151,10 +155,144 @@ def test_usage_summary():
     assert client.actual_usage_summary is None, "No actual cost should be recorded"
 
 
+@pytest.mark.skipif(skip, reason="openai>=1 not installed")
+def test_legacy_cache():
+    config_list = config_list_from_json(
+        env_or_file=OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+        filter_dict={"model": ["gpt-3.5-turbo", "gpt-35-turbo"]},
+    )
+
+    # Clear cache.
+    if os.path.exists(LEGACY_CACHE_DIR):
+        shutil.rmtree(LEGACY_CACHE_DIR)
+
+    # Test default cache seed.
+    client = OpenAIWrapper(config_list=config_list)
+    start_time = time.time()
+    cold_cache_response = client.create(messages=[{"role": "user", "content": "random()"}])
+    end_time = time.time()
+    duration_with_cold_cache = end_time - start_time
+
+    start_time = time.time()
+    warm_cache_response = client.create(messages=[{"role": "user", "content": "random()"}])
+    end_time = time.time()
+    duration_with_warm_cache = end_time - start_time
+    assert cold_cache_response == warm_cache_response
+    assert duration_with_warm_cache < duration_with_cold_cache
+    assert os.path.exists(os.path.join(LEGACY_CACHE_DIR, str(LEGACY_DEFAULT_CACHE_SEED)))
+
+    # Test with cache seed set through constructor
+    client = OpenAIWrapper(config_list=config_list, cache_seed=13)
+    start_time = time.time()
+    cold_cache_response = client.create(messages=[{"role": "user", "content": "random()"}])
+    end_time = time.time()
+    duration_with_cold_cache = end_time - start_time
+
+    start_time = time.time()
+    warm_cache_response = client.create(messages=[{"role": "user", "content": "random()"}])
+    end_time = time.time()
+    duration_with_warm_cache = end_time - start_time
+    assert cold_cache_response == warm_cache_response
+    assert duration_with_warm_cache < duration_with_cold_cache
+    assert os.path.exists(os.path.join(LEGACY_CACHE_DIR, str(13)))
+
+    # Test with cache seed set through create method
+    client = OpenAIWrapper(config_list=config_list)
+    start_time = time.time()
+    cold_cache_response = client.create(messages=[{"role": "user", "content": "random()"}], cache_seed=17)
+    end_time = time.time()
+    duration_with_cold_cache = end_time - start_time
+
+    start_time = time.time()
+    warm_cache_response = client.create(messages=[{"role": "user", "content": "random()"}], cache_seed=17)
+    end_time = time.time()
+    duration_with_warm_cache = end_time - start_time
+    assert cold_cache_response == warm_cache_response
+    assert duration_with_warm_cache < duration_with_cold_cache
+    assert os.path.exists(os.path.join(LEGACY_CACHE_DIR, str(17)))
+
+    # Test using a different cache seed through create method.
+    start_time = time.time()
+    cold_cache_response = client.create(messages=[{"role": "user", "content": "random()"}], cache_seed=21)
+    end_time = time.time()
+    duration_with_cold_cache = end_time - start_time
+    assert duration_with_warm_cache < duration_with_cold_cache
+    assert os.path.exists(os.path.join(LEGACY_CACHE_DIR, str(21)))
+
+
+@pytest.mark.skipif(skip, reason="openai>=1 not installed")
+def test_cache():
+    config_list = config_list_from_json(
+        env_or_file=OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+        filter_dict={"model": ["gpt-3.5-turbo", "gpt-35-turbo"]},
+    )
+
+    # Clear cache.
+    if os.path.exists(LEGACY_CACHE_DIR):
+        shutil.rmtree(LEGACY_CACHE_DIR)
+    cache_dir = ".cache_test"
+    assert cache_dir != LEGACY_CACHE_DIR
+    if os.path.exists(cache_dir):
+        shutil.rmtree(cache_dir)
+
+    # Test cache set through constructor.
+    with Cache.disk(cache_seed=49, cache_path_root=cache_dir) as cache:
+        client = OpenAIWrapper(config_list=config_list, cache=cache)
+        start_time = time.time()
+        cold_cache_response = client.create(messages=[{"role": "user", "content": "random()"}])
+        end_time = time.time()
+        duration_with_cold_cache = end_time - start_time
+
+        start_time = time.time()
+        warm_cache_response = client.create(messages=[{"role": "user", "content": "random()"}])
+        end_time = time.time()
+        duration_with_warm_cache = end_time - start_time
+        assert cold_cache_response == warm_cache_response
+        assert duration_with_warm_cache < duration_with_cold_cache
+        assert os.path.exists(os.path.join(cache_dir, str(49)))
+        # Test legacy cache is not used.
+        assert not os.path.exists(os.path.join(LEGACY_CACHE_DIR, str(49)))
+        assert not os.path.exists(os.path.join(cache_dir, str(LEGACY_DEFAULT_CACHE_SEED)))
+
+    # Test cache set through method.
+    client = OpenAIWrapper(config_list=config_list)
+    with Cache.disk(cache_seed=312, cache_path_root=cache_dir) as cache:
+        start_time = time.time()
+        cold_cache_response = client.create(messages=[{"role": "user", "content": "random()"}], cache=cache)
+        end_time = time.time()
+        duration_with_cold_cache = end_time - start_time
+
+        start_time = time.time()
+        warm_cache_response = client.create(messages=[{"role": "user", "content": "random()"}], cache=cache)
+        end_time = time.time()
+        duration_with_warm_cache = end_time - start_time
+        assert cold_cache_response == warm_cache_response
+        assert duration_with_warm_cache < duration_with_cold_cache
+        assert os.path.exists(os.path.join(cache_dir, str(312)))
+        # Test legacy cache is not used.
+        assert not os.path.exists(os.path.join(LEGACY_CACHE_DIR, str(312)))
+        assert not os.path.exists(os.path.join(cache_dir, str(LEGACY_DEFAULT_CACHE_SEED)))
+
+    # Test different cache seed.
+    with Cache.disk(cache_seed=123, cache_path_root=cache_dir) as cache:
+        start_time = time.time()
+        cold_cache_response = client.create(messages=[{"role": "user", "content": "random()"}], cache=cache)
+        end_time = time.time()
+        duration_with_cold_cache = end_time - start_time
+        assert duration_with_warm_cache < duration_with_cold_cache
+        # Test legacy cache is not used.
+        assert not os.path.exists(os.path.join(LEGACY_CACHE_DIR, str(123)))
+        assert not os.path.exists(os.path.join(cache_dir, str(LEGACY_DEFAULT_CACHE_SEED)))
+
+
 if __name__ == "__main__":
     # test_aoai_chat_completion()
     # test_oai_tool_calling_extraction()
     # test_chat_completion()
-    test_completion()
+    # test_completion()
     # # test_cost()
     # test_usage_summary()
+    test_legacy_cache()
+    test_cache()
diff --git a/website/docs/Use-Cases/agent_chat.md b/website/docs/Use-Cases/agent_chat.md
index 7006c4ac10fe..06e439d8a860 100644
--- a/website/docs/Use-Cases/agent_chat.md
+++ b/website/docs/Use-Cases/agent_chat.md
@@ -288,18 +288,30 @@ By adopting the conversation-driven control with both programming language and n
 ### LLM Caching
 
 Since version 0.2.8, a configurable context manager allows you to easily configure LLM cache, using either DiskCache or Redis. All agents inside the context manager will use the same cache.
+
 ```python
 from autogen.cache.cache import Cache
 
-with Cache.redis(cache_seed=42, redis_url="redis://localhost:6379/0") as cache_client:
-    user.initiate_chat(assistant, message=coding_task, cache_client=cache_client)
+with Cache.redis(cache_seed=42, redis_url="redis://localhost:6379/0") as cache:
+    user.initiate_chat(assistant, message=coding_task, cache_client=cache)
 
-with Cache.disk(cache_seed=42, cache_dir=".cache") as cache_client:
-    user.initiate_chat(assistant, message=coding_task, cache_client=cache_client)
+with Cache.disk(cache_seed=42, cache_dir=".cache") as cache:
+    user.initiate_chat(assistant, message=coding_task, cache_client=cache)
 ```
 
-DiskCache is on by default with `cache_seed` set to 41.
-See [Caching](./enhanced_inference.md#caching) for more details.
+For backward compatibility, DiskCache is on by default with `cache_seed` set to 41.
+To disable caching completely, set `cache_seed` to `None` in the `llm_config` of the agent.
+
+```python
+assistant = AssistantAgent(
+    "coding_agent",
+    llm_config={
+        "cache_seed": None,
+        "config_list": OAI_CONFIG_LIST,
+        "max_tokens": 1024,
+    },
+)
+```
 
 ### Diverse Applications Implemented with AutoGen
 
diff --git a/website/docs/Use-Cases/enhanced_inference.md b/website/docs/Use-Cases/enhanced_inference.md
index 7d38139ff967..000cb61c2169 100644
--- a/website/docs/Use-Cases/enhanced_inference.md
+++ b/website/docs/Use-Cases/enhanced_inference.md
@@ -172,38 +172,40 @@ API call results are cached locally and reused when the same request is issued.
 
 Starting version 0.2.8, a configurable context manager allows you to easily configure
 the cache, using either DiskCache or Redis.
-All LLM agents inside the context manager will use the same cache.
+All `OpenAIWrapper` created inside the context manager can use the same cache through the constructor..
 
 ```python
 from autogen.cache.cache import Cache
 
-with Cache.redis(cache_seed=42, redis_url="redis://localhost:6379/0") as cache_client:
-    user.initiate_chat(assistant, message=coding_task, cache_client=cache_client)
+with Cache.redis(cache_seed=42, redis_url="redis://localhost:6379/0") as cache:
+    client = OpenAIWrapper(..., cache=cache)
+    client.create(...)
 
-with Cache.disk(cache_seed=42, cache_dir=".cache") as cache_client:
-    user.initiate_chat(assistant, message=coding_task, cache_client=cache_client)
+with Cache.disk(cache_seed=42, cache_dir=".cache") as cache:
+    client = OpenAIWrapper(..., cache=cache)
+    client.create(...)
+```
+
+You can also set a cache directly in the `create()` method.
+
+```python
+client = OpenAIWrapper()
+with Cache.disk(cache_seed=42, cache_dir=".cache") as cache:
+    client.create(..., cache=cache)
 ```
 
 You can control the randomness by setting the `cache_seed` parameter.
 
 ### Turnning off cache
+
 For backward compatibility, DiskCache is always enabled by default
 with `cache_seed` set to 41. To fully disable it, set `cache_seed` to None.
 
 ```python
-# You turn off cache directly through the agent,
-assistant = AssistantAgent(
-    "coding_agent",
-    llm_config={
-        "cache_seed": None,
-        "config_list": OAI_CONFIG_LIST,
-        "max_tokens": 1024,
-    },
-)
-
-# or in the OpenAIWrapper create method.
-client = OpenAIWrapper()
-client.create(cache_seed=..., ...)
+# Turn off cache in constructor,
+client = OpenAIWrapper(..., cache_seed=None)
+# or directly in create().
+client.create(..., cache_seed=None)
 ```
 
 _NOTE_. openai v1.1 introduces a new param `seed`. The difference between autogen's `cache_seed` and openai's `seed` is that:

From 08ddde41a35b2f5f3ca49a362260fa22e724584d Mon Sep 17 00:00:00 2001
From: Eric Zhu <ekzhu@users.noreply.github.com>
Date: Thu, 18 Jan 2024 23:46:50 -0800
Subject: [PATCH 11/14] typo

---
 autogen/agentchat/conversable_agent.py | 2 +-
 website/docs/Use-Cases/agent_chat.md   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py
index 1ad796f415d4..b47554144704 100644
--- a/autogen/agentchat/conversable_agent.py
+++ b/autogen/agentchat/conversable_agent.py
@@ -136,7 +136,7 @@ def __init__(
                 self.llm_config.update(llm_config)
             self.client = OpenAIWrapper(**self.llm_config)
 
-        # initialize standalone cache client
+        # Initialize standalone client cache object.
         self.client_cache = None
 
         self._code_execution_config: Union[Dict, Literal[False]] = (
diff --git a/website/docs/Use-Cases/agent_chat.md b/website/docs/Use-Cases/agent_chat.md
index 06e439d8a860..98c103aa466e 100644
--- a/website/docs/Use-Cases/agent_chat.md
+++ b/website/docs/Use-Cases/agent_chat.md
@@ -293,10 +293,10 @@ Since version 0.2.8, a configurable context manager allows you to easily configu
 from autogen.cache.cache import Cache
 
 with Cache.redis(cache_seed=42, redis_url="redis://localhost:6379/0") as cache:
-    user.initiate_chat(assistant, message=coding_task, cache_client=cache)
+    user.initiate_chat(assistant, message=coding_task, cache=cache)
 
 with Cache.disk(cache_seed=42, cache_dir=".cache") as cache:
-    user.initiate_chat(assistant, message=coding_task, cache_client=cache)
+    user.initiate_chat(assistant, message=coding_task, cache=cache)
 ```
 
 For backward compatibility, DiskCache is on by default with `cache_seed` set to 41.

From ba6f80462cde4b08feb0a7e984464f1343aace8c Mon Sep 17 00:00:00 2001
From: Eric Zhu <ekzhu@users.noreply.github.com>
Date: Fri, 19 Jan 2024 09:45:15 -0800
Subject: [PATCH 12/14] Update website/docs/Use-Cases/enhanced_inference.md

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
---
 website/docs/Use-Cases/enhanced_inference.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/Use-Cases/enhanced_inference.md b/website/docs/Use-Cases/enhanced_inference.md
index 000cb61c2169..17f871e461fe 100644
--- a/website/docs/Use-Cases/enhanced_inference.md
+++ b/website/docs/Use-Cases/enhanced_inference.md
@@ -172,7 +172,7 @@ API call results are cached locally and reused when the same request is issued.
 
 Starting version 0.2.8, a configurable context manager allows you to easily configure
 the cache, using either DiskCache or Redis.
-All `OpenAIWrapper` created inside the context manager can use the same cache through the constructor..
+All `OpenAIWrapper` created inside the context manager can use the same cache through the constructor.
 
 ```python
 from autogen.cache.cache import Cache

From 49bde528f0fe5ce422004bc671b68da3aa5d26a5 Mon Sep 17 00:00:00 2001
From: Vijay Ramesh <vijay@regrello.com>
Date: Fri, 19 Jan 2024 19:49:22 -0800
Subject: [PATCH 13/14] save previous client cache and reset it after
 send/a_send

---
 autogen/agentchat/conversable_agent.py | 15 +++++++++++----
 autogen/agentchat/groupchat.py         |  7 ++++++-
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py
index b47554144704..6a3d7f4228bf 100644
--- a/autogen/agentchat/conversable_agent.py
+++ b/autogen/agentchat/conversable_agent.py
@@ -692,10 +692,13 @@ def initiate_chat(
         """
         for agent in [self, recipient]:
             agent._raise_exception_on_async_reply_functions()
+            agent.previous_cache = agent.client_cache
+            agent.client_cache = cache
         self._prepare_chat(recipient, clear_history)
-        self.client_cache = cache
-        recipient.client_cache = cache
         self.send(self.generate_init_message(**context), recipient, silent=silent)
+        for agent in [self, recipient]:
+            agent.client_cache = agent.previous_cache
+            agent.previous_cache = None
 
     async def a_initiate_chat(
         self,
@@ -721,9 +724,13 @@ async def a_initiate_chat(
                           Otherwise, input() will be called to get the initial message.
         """
         self._prepare_chat(recipient, clear_history)
-        self.client_cache = cache
-        recipient.client_cache = cache
+        for agent in [self, recipient]:
+            agent.previous_cache = agent.client_cache
+            agent.client_cache = cache
         await self.a_send(await self.a_generate_init_message(**context), recipient, silent=silent)
+        for agent in [self, recipient]:
+            agent.client_cache = agent.previous_cache
+            agent.previous_cache = None
 
     def reset(self):
         """Reset the agent."""
diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py
index 9e5548b16652..9024eac2dc73 100644
--- a/autogen/agentchat/groupchat.py
+++ b/autogen/agentchat/groupchat.py
@@ -349,7 +349,7 @@ def run_chat(
         messages: Optional[List[Dict]] = None,
         sender: Optional[Agent] = None,
         config: Optional[GroupChat] = None,
-    ) -> Union[str, Dict, None]:
+    ) -> Tuple[bool, Optional[str]]:
         """Run a group chat."""
         if messages is None:
             messages = self._oai_messages[sender]
@@ -358,6 +358,7 @@ def run_chat(
         groupchat = config
         if self.client_cache is not None:
             for a in groupchat.agents:
+                a.previous_cache = a.client_cache
                 a.client_cache = self.client_cache
         for i in range(groupchat.max_round):
             groupchat.append(message, speaker)
@@ -392,6 +393,10 @@ def run_chat(
             message = self.last_message(speaker)
             if i == groupchat.max_round - 1:
                 groupchat.append(message, speaker)
+        if self.client_cache is not None:
+            for a in groupchat.agents:
+                a.client_cache = a.previous_cache
+                a.previous_cache = None
         return True, None
 
     async def a_run_chat(

From 7e088f9f1ed373e3bd37f7576d516a999601e38c Mon Sep 17 00:00:00 2001
From: Eric Zhu <ekzhu@users.noreply.github.com>
Date: Fri, 19 Jan 2024 21:11:37 -0800
Subject: [PATCH 14/14] a_run_chat

---
 autogen/agentchat/groupchat.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py
index 9024eac2dc73..4a18744000ee 100644
--- a/autogen/agentchat/groupchat.py
+++ b/autogen/agentchat/groupchat.py
@@ -411,6 +411,10 @@ async def a_run_chat(
         message = messages[-1]
         speaker = sender
         groupchat = config
+        if self.client_cache is not None:
+            for a in groupchat.agents:
+                a.previous_cache = a.client_cache
+                a.client_cache = self.client_cache
         for i in range(groupchat.max_round):
             groupchat.append(message, speaker)
 
@@ -444,6 +448,10 @@ async def a_run_chat(
             # The speaker sends the message without requesting a reply
             await speaker.a_send(reply, self, request_reply=False)
             message = self.last_message(speaker)
+        if self.client_cache is not None:
+            for a in groupchat.agents:
+                a.client_cache = a.previous_cache
+                a.previous_cache = None
         return True, None
 
     def _raise_exception_on_async_reply_functions(self) -> None: