microsoft
diff --git a/‎autogen/agentchat/contrib/agent_optimizer.py
+16-13 b/‎autogen/agentchat/contrib/agent_optimizer.py
+16-13
diff --git a/‎autogen/agentchat/contrib/capabilities/transforms.py
+20-12 b/‎autogen/agentchat/contrib/capabilities/transforms.py
+20-12
diff --git a/‎notebook/agentchat_agentoptimizer.ipynb
+14-9 b/‎notebook/agentchat_agentoptimizer.ipynb
+14-9
@@ -1,6 +1,6 @@
 import copy
 import json
-from typing import Dict, List, Optional
+from typing import Dict, List, Literal, Optional, Union
 
 import autogen
 from autogen.code_utils import execute_code
@@ -172,16 +172,16 @@ class AgentOptimizer:
     def __init__(
         self,
         max_actions_per_step: int,
-        config_file_or_env: Optional[str] = "OAI_CONFIG_LIST",
-        config_file_location: Optional[str] = "",
+        llm_config: dict,
         optimizer_model: Optional[str] = "gpt-4-1106-preview",
     ):
         """
         (These APIs are experimental and may change in the future.)
         Args:
             max_actions_per_step (int): the maximum number of actions that the optimizer can take in one step.
-            config_file_or_env: path or environment of the OpenAI api configs.
-            config_file_location: the location of the OpenAI config file.
+            llm_config (dict): llm inference configuration.
+                Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create) for available options.
+                When using OpenAI or Azure OpenAI endpoints, please specify a non-empty 'model' either in `llm_config` or in each config of 'config_list' in `llm_config`.
             optimizer_model: the model used for the optimizer.
         """
         self.max_actions_per_step = max_actions_per_step
@@ -199,14 +199,17 @@ def __init__(
         self._failure_functions_performance = []
         self._best_performance = -1
 
-        config_list = autogen.config_list_from_json(
-            config_file_or_env,
-            file_location=config_file_location,
-            filter_dict={"model": [self.optimizer_model]},
+        assert isinstance(llm_config, dict), "llm_config must be a dict"
+        llm_config = copy.deepcopy(llm_config)
+        self.llm_config = llm_config
+        if self.llm_config in [{}, {"config_list": []}, {"config_list": [{"model": ""}]}]:
+            raise ValueError(
+                "When using OpenAI or Azure OpenAI endpoints, specify a non-empty 'model' either in 'llm_config' or in each config of 'config_list'."
+            )
+        self.llm_config["config_list"] = autogen.filter_config(
+            llm_config["config_list"], {"model": [self.optimizer_model]}
         )
-        if len(config_list) == 0:
-            raise RuntimeError("No valid openai config found in the config file or environment variable.")
-        self._client = autogen.OpenAIWrapper(config_list=config_list)
+        self._client = autogen.OpenAIWrapper(**self.llm_config)
 
     def record_one_conversation(self, conversation_history: List[Dict], is_satisfied: bool = None):
         """
@@ -266,7 +269,7 @@ def step(self):
                 actions_num=action_index,
                 best_functions=best_functions,
                 incumbent_functions=incumbent_functions,
-                accumerated_experience=failure_experience_prompt,
+                accumulated_experience=failure_experience_prompt,
                 statistic_informations=statistic_prompt,
             )
             messages = [{"role": "user", "content": prompt}]
 
@@ -85,7 +85,8 @@ class MessageTokenLimiter:
     2. Individual messages are truncated based on max_tokens_per_message. For multimodal messages containing both text
         and other types of content, only the text content is truncated.
     3. The overall conversation history is truncated based on the max_tokens limit. Once the accumulated token count
-        exceeds this limit, the current message being processed as well as any remaining messages are discarded.
+        exceeds this limit, the current message being processed get truncated to meet the total token count and any
+        remaining messages get discarded.
     4. The truncated conversation history is reconstructed by prepending the messages to a new list to preserve the
         original message order.
     """
@@ -128,13 +129,20 @@ def apply_transform(self, messages: List[Dict]) -> List[Dict]:
         total_tokens = sum(_count_tokens(msg["content"]) for msg in temp_messages)
 
         for msg in reversed(temp_messages):
-            msg["content"] = self._truncate_str_to_tokens(msg["content"])
-            msg_tokens = _count_tokens(msg["content"])
+            expected_tokens_remained = self._max_tokens - processed_messages_tokens - self._max_tokens_per_message
 
-            # If adding this message would exceed the token limit, discard it and all remaining messages
-            if processed_messages_tokens + msg_tokens > self._max_tokens:
+            # If adding this message would exceed the token limit, truncate the last message to meet the total token
+            # limit and discard all remaining messages
+            if expected_tokens_remained < 0:
+                msg["content"] = self._truncate_str_to_tokens(
+                    msg["content"], self._max_tokens - processed_messages_tokens
+                )
+                processed_messages.insert(0, msg)
                 break
 
+            msg["content"] = self._truncate_str_to_tokens(msg["content"], self._max_tokens_per_message)
+            msg_tokens = _count_tokens(msg["content"])
+
             # prepend the message to the list to preserve order
             processed_messages_tokens += msg_tokens
             processed_messages.insert(0, msg)
@@ -149,30 +157,30 @@ def apply_transform(self, messages: List[Dict]) -> List[Dict]:
 
         return processed_messages
 
-    def _truncate_str_to_tokens(self, contents: Union[str, List]) -> Union[str, List]:
+    def _truncate_str_to_tokens(self, contents: Union[str, List], n_tokens: int) -> Union[str, List]:
         if isinstance(contents, str):
-            return self._truncate_tokens(contents)
+            return self._truncate_tokens(contents, n_tokens)
         elif isinstance(contents, list):
-            return self._truncate_multimodal_text(contents)
+            return self._truncate_multimodal_text(contents, n_tokens)
         else:
             raise ValueError(f"Contents must be a string or a list of dictionaries. Received type: {type(contents)}")
 
-    def _truncate_multimodal_text(self, contents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    def _truncate_multimodal_text(self, contents: List[Dict[str, Any]], n_tokens: int) -> List[Dict[str, Any]]:
         """Truncates text content within a list of multimodal elements, preserving the overall structure."""
         tmp_contents = []
         for content in contents:
             if content["type"] == "text":
-                truncated_text = self._truncate_tokens(content["text"])
+                truncated_text = self._truncate_tokens(content["text"], n_tokens)
                 tmp_contents.append({"type": "text", "text": truncated_text})
             else:
                 tmp_contents.append(content)
         return tmp_contents
 
-    def _truncate_tokens(self, text: str) -> str:
+    def _truncate_tokens(self, text: str, n_tokens: int) -> str:
         encoding = tiktoken.encoding_for_model(self._model)  # Get the appropriate tokenizer
 
         encoded_tokens = encoding.encode(text)
-        truncated_tokens = encoded_tokens[: self._max_tokens_per_message]
+        truncated_tokens = encoded_tokens[:n_tokens]
         truncated_text = encoding.decode(truncated_tokens)  # Decode back to text
 
         return truncated_text
 
@@ -41,6 +41,7 @@
    "source": [
     "import copy\n",
     "import json\n",
+    "import os\n",
     "from typing import Any, Callable, Dict, List, Optional, Tuple, Union\n",
     "\n",
     "from openai import BadRequestError\n",
@@ -299,16 +300,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "config_list = config_list_from_json(env_or_file=\"OAI_CONFIG_LIST\")\n",
+    "llm_config = {\n",
+    "    \"config_list\": [\n",
+    "        {\n",
+    "            \"model\": \"gpt-4-1106-preview\",\n",
+    "            \"api_type\": \"azure\",\n",
+    "            \"api_key\": os.environ[\"AZURE_OPENAI_API_KEY\"],\n",
+    "            \"base_url\": \"https://ENDPOINT.openai.azure.com/\",\n",
+    "            \"api_version\": \"2023-07-01-preview\",\n",
+    "        }\n",
+    "    ]\n",
+    "}\n",
     "\n",
     "assistant = autogen.AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    system_message=\"You are a helpful assistant.\",\n",
-    "    llm_config={\n",
-    "        \"timeout\": 600,\n",
-    "        \"seed\": 42,\n",
-    "        \"config_list\": config_list,\n",
-    "    },\n",
+    "    llm_config=llm_config,\n",
     ")\n",
     "user_proxy = MathUserProxyAgent(\n",
     "    name=\"mathproxyagent\",\n",
@@ -361,9 +368,7 @@
    "source": [
     "EPOCH = 10\n",
     "optimizer_model = \"gpt-4-1106-preview\"\n",
-    "optimizer = AgentOptimizer(\n",
-    "    max_actions_per_step=3, config_file_or_env=\"OAI_CONFIG_LIST\", optimizer_model=optimizer_model\n",
-    ")\n",
+    "optimizer = AgentOptimizer(max_actions_per_step=3, llm_config=llm_config, optimizer_model=optimizer_model)\n",
     "for i in range(EPOCH):\n",
     "    for index, query in enumerate(train_data):\n",
     "        is_correct = user_proxy.initiate_chat(assistant, answer=query[\"answer\"], problem=query[\"question\"])\n",