microsoft · sonichi · Jan 13, 2024 · Dec 29, 2023 · Dec 30, 2023 · Jan 1, 2024
diff --git a/.github/workflows/deploy-website.yml b/.github/workflows/deploy-website.yml
@@ -26,9 +26,9 @@ jobs:
         working-directory: website
     steps:
       - uses: actions/checkout@v3
-      - uses: actions/setup-node@v3
+      - uses: actions/setup-node@v4
         with:
-          node-version: 14.x
+          node-version: 18.x
       - name: setup python
         uses: actions/setup-python@v4
         with:
@@ -60,9 +60,9 @@ jobs:
         working-directory: website
     steps:
       - uses: actions/checkout@v3
-      - uses: actions/setup-node@v3
+      - uses: actions/setup-node@v4
         with:
-          node-version: 14.x
+          node-version: 18.x
       - name: setup python
         uses: actions/setup-python@v4
         with:

diff --git a/autogen/agentchat/contrib/img_utils.py b/autogen/agentchat/contrib/img_utils.py
@@ -27,11 +27,11 @@ def get_image_data(image_file: str, use_b64=True) -> bytes:
 
 
 def llava_formatter(prompt: str, order_image_tokens: bool = False) -> Tuple[str, List[str]]:
-    """
+    r"""
     Formats the input prompt by replacing image tags and returns the new prompt along with image locations.
 
     Parameters:
-        - prompt (str): The input string that may contain image tags like <img ...>.
+        - prompt (str): The input string that may contain image tags like `\<img ...\>`.
         - order_image_tokens (bool, optional): Whether to order the image tokens with numbers.
             It will be useful for GPT-4V. Defaults to False.
 
@@ -93,11 +93,11 @@ def _get_mime_type_from_data_uri(base64_image):
 
 
 def gpt4v_formatter(prompt: str) -> List[Union[str, dict]]:
-    """
+    r"""
     Formats the input prompt by replacing image tags and returns a list of text and images.
 
     Parameters:
-        - prompt (str): The input string that may contain image tags like <img ...>.
+        - prompt (str): The input string that may contain image tags like `\<img ...\>`.
 
     Returns:
         - List[Union[str, dict]]: A list of alternating text and image dictionary items.

diff --git a/autogen/code_utils.py b/autogen/code_utils.py
@@ -137,7 +137,7 @@ def extract_code(
 
 
 def generate_code(pattern: str = CODE_BLOCK_PATTERN, **config) -> Tuple[str, float]:
-    """(openai<1) Generate code.
+    r"""(openai\<1) Generate code.
 
     Args:
         pattern (Optional, str): The regular expression pattern for finding the code block.
@@ -162,7 +162,7 @@ def generate_code(pattern: str = CODE_BLOCK_PATTERN, **config) -> Tuple[str, flo
 
 
 def improve_function(file_name, func_name, objective, **config):
-    """(openai<1) Improve the function to achieve the objective."""
+    r"""(openai\<1) Improve the function to achieve the objective."""
     params = {**_IMPROVE_FUNCTION_CONFIG, **config}
     # read the entire file into a str
     with open(file_name, "r") as f:
@@ -183,7 +183,7 @@ def improve_function(file_name, func_name, objective, **config):
 
 
 def improve_code(files, objective, suggest_only=True, **config):
-    """(openai<1) Improve the code to achieve a given objective.
+    r"""(openai\<1) Improve the code to achieve a given objective.
 
     Args:
         files (list): A list of file names containing the source code.
@@ -433,7 +433,7 @@ def execute_code(
 
 
 def generate_assertions(definition: str, **config) -> Tuple[str, float]:
-    """(openai<1) Generate assertions for a function.
+    r"""(openai\<1) Generate assertions for a function.
 
     Args:
         definition (str): The function definition, including the signature and docstr.
@@ -470,7 +470,7 @@ def eval_function_completions(
     timeout: Optional[float] = 3,
     use_docker: Optional[bool] = True,
 ) -> Dict:
-    """(openai<1) Select a response from a list of responses for the function completion task (using generated assertions), and/or evaluate if the task is successful using a gold test.
+    r"""(openai\<1) Select a response from a list of responses for the function completion task (using generated assertions), and/or evaluate if the task is successful using a gold test.
 
     Args:
         responses (list): The list of responses.
@@ -560,7 +560,7 @@ def __init__(self, assertions):
         self.metrics = self.responses = None
 
     def pass_assertions(self, context, response, **_):
-        """(openai<1) Check if the response passes the assertions."""
+        r"""(openai\<1) Check if the response passes the assertions."""
         responses = oai.Completion.extract_text(response)
         metrics = eval_function_completions(responses, context["definition"], assertions=self._assertions)
         self._assertions = metrics["assertions"]
@@ -575,7 +575,7 @@ def implement(
     configs: Optional[List[Dict]] = None,
     assertions: Optional[Union[str, Callable[[str], Tuple[str, float]]]] = generate_assertions,
 ) -> Tuple[str, float]:
-    """(openai<1) Implement a function from a definition.
+    r"""(openai\<1) Implement a function from a definition.
 
     Args:
         definition (str): The function definition, including the signature and docstr.

diff --git a/autogen/math_utils.py b/autogen/math_utils.py
@@ -9,7 +9,7 @@
 
 
 def solve_problem(problem: str, **config) -> str:
-    """(openai<1) Solve the math problem.
+    r"""(openai\<1) Solve the math problem.
 
     Args:
         problem (str): The problem statement.
@@ -25,13 +25,13 @@ def solve_problem(problem: str, **config) -> str:
 
 
 def remove_boxed(string: str) -> Optional[str]:
-    """Source: https://github.com/hendrycks/math
-    Extract the text within a \\boxed{...} environment.
+    r"""Source: https://github.com/hendrycks/math
+    Extract the text within a \\boxed\{...\} environment.
     Example:
 
-    > remove_boxed("\\boxed{\\frac{2}{3}}")
-
-    \\frac{2}{3}
+    > remove_boxed("\\boxed\{\\frac\{2\}\{3\}\}")
+    Output:
+    > \\frac\{2\}\{3\}
     """
     left = "\\boxed{"
     try:
@@ -44,8 +44,8 @@ def remove_boxed(string: str) -> Optional[str]:
 
 
 def last_boxed_only_string(string: str) -> Optional[str]:
-    """Source: https://github.com/hendrycks/math
-    Extract the last \\boxed{...} or \\fbox{...} element from a string.
+    r"""Source: https://github.com/hendrycks/math
+    Extract the last \\boxed\{...\} or \\fbox\{...\} element from a string.
     """
     idx = string.rfind("\\boxed")
     if idx < 0:

diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py
@@ -40,7 +40,7 @@
 
 
 class Completion(openai_Completion):
-    """(openai<1) A class for OpenAI completion API.
+    r"""(openai\<1) A class for OpenAI completion API.
 
     It also supports: ChatCompletion, Azure OpenAI API.
     """
@@ -126,26 +126,26 @@ class Completion(openai_Completion):
 
     @classmethod
     def set_cache(cls, seed: Optional[int] = 41, cache_path_root: Optional[str] = ".cache"):
-        """Set cache path.
+        r"""Set cache path.
 
         Args:
             seed (int, Optional): The integer identifier for the pseudo seed.
                 Results corresponding to different seeds will be cached in different places.
             cache_path (str, Optional): The root path for the cache.
-                The complete cache path will be {cache_path}/{seed}.
+                The complete cache path will be \{cache_path\}/\{seed\}.
         """
         cls.cache_seed = seed
         cls.cache_path = f"{cache_path_root}/{seed}"
 
     @classmethod
     def clear_cache(cls, seed: Optional[int] = None, cache_path_root: Optional[str] = ".cache"):
-        """Clear cache.
+        r"""Clear cache.
 
         Args:
             seed (int, Optional): The integer identifier for the pseudo seed.
                 If omitted, all caches under cache_path_root will be cleared.
             cache_path (str, Optional): The root path for the cache.
-                The complete cache path will be {cache_path}/{cache_seed}.
+                The complete cache path will be \{cache_path\}/\{cache_seed\}.
         """
         if seed is None:
             shutil.rmtree(cache_path_root, ignore_errors=True)
@@ -1189,7 +1189,7 @@ def stop_logging(cls):
 
 
 class ChatCompletion(Completion):
-    """(openai<1) A class for OpenAI API ChatCompletion. Share the same API as Completion."""
+    r"""(openai\<1) A class for OpenAI API ChatCompletion. Share the same API as Completion."""
 
     default_search_space = Completion.default_search_space.copy()
     default_search_space["model"] = tune.choice(["gpt-3.5-turbo", "gpt-4"])

diff --git a/website/blog/2023-04-21-LLM-tuning-math/index.mdx b/website/blog/2023-04-21-LLM-tuning-math/index.mdx
@@ -28,13 +28,13 @@ We use AutoGen to select between the following models with a target inference bu
 
 We adapt the models using 20 examples in the train set, using the problem statement as the input and generating the solution as the output. We use the following inference parameters:
 
-- temperature: The parameter that controls the randomness of the output text. A higher temperature means more diversity but less coherence. We search for the optimal temperature in the range of [0, 1].
-- top_p: The parameter that controls the probability mass of the output tokens. Only tokens with a cumulative probability less than or equal to top-p are considered. A lower top-p means more diversity but less coherence. We search for the optimal top-p in the range of [0, 1].
-- max_tokens: The maximum number of tokens that can be generated for each output. We search for the optimal max length in the range of [50, 1000].
-- n: The number of responses to generate. We search for the optimal n in the range of [1, 100].
-- prompt: We use the template: "{problem} Solve the problem carefully. Simplify your answer as much as possible. Put the final answer in \\boxed{{}}." where {problem} will be replaced by the math problem instance.
+- temperature: The parameter that controls the randomness of the output text. A higher temperature means more diversity but less coherence. We search for the optimal temperature in the range of `[0, 1]`.
+- top_p: The parameter that controls the probability mass of the output tokens. Only tokens with a cumulative probability less than or equal to top-p are considered. A lower top-p means more diversity but less coherence. We search for the optimal top-p in the range of `[0, 1]`.
+- max_tokens: The maximum number of tokens that can be generated for each output. We search for the optimal max length in the range of `[50, 1000]`.
+- n:The number of responses to generate. We search for the optimal n in the range of `[1, 100]`.
+- prompt: We use the template: "\{problem} Solve the problem carefully. Simplify your answer as much as possible. Put the final answer in `boxed{{}}."` where `{problem}` will be replaced by the math problem instance.
 
-In this experiment, when n > 1, we find the answer with highest votes among all the responses and then select it as the final answer to compare with the ground truth. For example, if n = 5 and 3 of the responses contain a final answer 301 while 2 of the responses contain a final answer 159, we choose 301 as the final answer. This can help with resolving potential errors due to randomness. We use the average accuracy and average inference cost as the metric to evaluate the performance over a dataset. The inference cost of a particular instance is measured by the price per 1K tokens and the number of tokens consumed.
+In this experiment, when `n > 1`, we find the answer with highest votes among all the responses and then select it as the final answer to compare with the ground truth. For example, if n = 5 and 3 of the responses contain a final answer 301 while 2 of the responses contain a final answer 159, we choose 301 as the final answer. This can help with resolving potential errors due to randomness. We use the average accuracy and average inference cost as the metric to evaluate the performance over a dataset. The inference cost of a particular instance is measured by the price per 1K tokens and the number of tokens consumed.
 
 ## Experiment Results
 

diff --git a/website/blog/2023-07-14-Local-LLMs/index.mdx b/website/blog/2023-07-14-Local-LLMs/index.mdx
@@ -64,7 +64,7 @@ class CompletionResponseStreamChoice(BaseModel):
 ```
 
 
-## Interact with model using `oai.Completion` (requires openai<1)
+## Interact with model using `oai.Completion` (requires openai&lt;1)
 
 Now the models can be directly accessed through openai-python library as well as `autogen.oai.Completion` and `autogen.oai.ChatCompletion`.
 

diff --git a/website/docs/Contribute.md b/website/docs/Contribute.md
@@ -13,7 +13,7 @@ This project welcomes and encourages all forms of contributions, including but n
 
 Most contributions require you to agree to a
 Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
-the rights to use your contribution. For details, visit <https://cla.opensource.microsoft.com>.
+the rights to use your contribution. For details, visit [https://cla.opensource.microsoft.com](https://cla.opensource.microsoft.com).
 
 If you are new to GitHub [here](https://help.github.com/categories/collaborating-with-issues-and-pull-requests/) is a detailed help source on getting involved with development on GitHub.
 

diff --git a/website/docs/Use-Cases/enhanced_inference.md b/website/docs/Use-Cases/enhanced_inference.md
@@ -4,7 +4,7 @@
 `autogen.Completion` is a drop-in replacement of `openai.Completion` and `openai.ChatCompletion` for enhanced LLM inference using `openai<1`.
 There are a number of benefits of using `autogen` to perform inference: performance tuning, API unification, caching, error handling, multi-config inference, result filtering, templating and so on.
 
-## Tune Inference Parameters (for openai<1)
+## Tune Inference Parameters (for openai&lt;1)
 
 Find a list of examples in this page: [Tune Inference Parameters Examples](../Examples.md#tune-inference-hyperparameters)
 
@@ -68,7 +68,7 @@ Users can specify the (optional) search range for each hyperparameter.
 1. model. Either a constant str, or multiple choices specified by `flaml.tune.choice`.
 1. prompt/messages. Prompt is either a str or a list of strs, of the prompt templates. messages is a list of dicts or a list of lists, of the message templates.
 Each prompt/message template will be formatted with each data instance. For example, the prompt template can be:
-"{problem} Solve the problem carefully. Simplify your answer as much as possible. Put the final answer in \\boxed{{}}."
+"`{problem}` Solve the problem carefully. Simplify your answer as much as possible. Put the final answer in `\boxed{{}}."`
 And `{problem}` will be replaced by the "problem" field of each data instance.
 1. max_tokens, n, best_of. They can be constants, or specified by `flaml.tune.randint`, `flaml.tune.qrandint`, `flaml.tune.lograndint` or `flaml.qlograndint`. By default, max_tokens is searched in [50, 1000); n is searched in [1, 100); and best_of is fixed to 1.
 1. stop. It can be a str or a list of strs, or a list of lists of strs or None. Default is None.
@@ -322,7 +322,7 @@ context.append(
 response = client.create(context=context, messages=messages, **config)
 ```
 
-## Logging (for openai<1)
+## Logging (for openai&lt;1)
 
 When debugging or diagnosing an LLM-based system, it is often convenient to log the API calls and analyze them. `autogen.Completion` and `autogen.ChatCompletion` offer an easy way to collect the API call histories. For example, to log the chat histories, simply run:
 ```python

diff --git a/website/package.json b/website/package.json
@@ -25,23 +25,28 @@
     "write-heading-ids": "docusaurus write-heading-ids"
   },
   "dependencies": {
-    "@docusaurus/core": "0.0.0-4193",
-    "@docusaurus/preset-classic": "0.0.0-4193",
+    "@docusaurus/core": "^3.0.1",
+    "@docusaurus/preset-classic": "^3.0.1",
     "@easyops-cn/docusaurus-search-local": "^0.21.1",
-    "@mdx-js/react": "^1.6.21",
+    "@mdx-js/react": "^3.0.0",
     "@svgr/webpack": "^5.5.0",
     "antd": "^5.11.5",
     "clsx": "^1.1.1",
     "file-loader": "^6.2.0",
     "hast-util-is-element": "1.1.0",
+    "joi": "17.6.0",
     "minimatch": "3.0.5",
-    "react": "^17.0.1",
-    "react-dom": "^17.0.1",
+    "react": "^18.2.0",
+    "react-dom": "^18.2.0",
     "rehype-katex": "4",
     "remark-math": "3",
     "trim": "^0.0.3",
     "url-loader": "^4.1.1"
   },
+  "devDependencies": {
+    "@docusaurus/module-type-aliases": "^3.0.1",
+    "@docusaurus/types": "^3.0.1"
+  },
   "browserslist": {
     "production": [
       ">0.5%",

diff --git a/website/sidebars.js b/website/sidebars.js
@@ -14,7 +14,8 @@
     'Getting-Started',
     'Installation',
     {'Use Cases': [{type: 'autogenerated', dirName: 'Use-Cases'}]},
-    {'Examples': [{type: 'autogenerated', dirName: 'Examples'}]},
+    // {'Examples': [{type: 'autogenerated', dirName: 'Examples'}]},
+    'Examples',
     'Contribute',
     'Research',
   ],
-Original file line number
+Diff line change
@@ Expand Up / @@ -64,7 +64,7 @@ class CompletionResponseStreamChoice(BaseModel): @@
     ```
-    ## Interact with model using `oai.Completion` (requires openai<1)
+    ## Interact with model using `oai.Completion` (requires openai&lt;1)
     Now the models can be directly accessed through openai-python library as well as `autogen.oai.Completion` and `autogen.oai.ChatCompletion`.
@@ Expand Down @@