UPDATE - added working config_list_from_dotenv() with passing tests, …

…and updated notebook
microsoft · Oct 4, 2023 · 3b0adf6 · 3b0adf6
1 parent b1f9f01
commit 3b0adf6
Show file tree

Hide file tree

Showing 3 changed files with 137 additions and 46 deletions.
diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py
@@ -315,24 +315,30 @@ def config_list_from_dotenv(
     """
     if dotenv_file_path:
         dotenv_path = Path(dotenv_file_path)
-        if not dotenv_path.exists():
-            raise FileNotFoundError(f"The specified .env file {dotenv_file_path} does not exist.")
-        load_dotenv(dotenv_path)
+        if dotenv_path.exists():
+            load_dotenv(dotenv_path)
+        else:
+            logging.warning(f"The specified .env file {dotenv_path} does not exist.")
     else:
         dotenv_path = find_dotenv()
         if not dotenv_path:
             logging.warning("No .env file found. Loading configurations from environment variables.")
-        else:
-            load_dotenv(dotenv_path)
+        load_dotenv(dotenv_path)
 
-    if not model_api_key_map:
-        # Default key map
-        model_api_key_map = {
-            "gpt-4": "OPENAI_API_KEY",
-            "gpt-3.5-turbo": "OPENAI_API_KEY",
-        }
+    # Ensure the model_api_key_map is not None to prevent TypeErrors during key assignment.
+    model_api_key_map = model_api_key_map or {}
+
+    # Ensure default models are always considered
+    default_models = ["gpt-4", "gpt-3.5-turbo"]
+
+    for model in default_models:
+        # Only assign default API key if the model is not present in the map.
+        # If model is present but set to invalid/empty, do not overwrite.
+        if model not in model_api_key_map:
+            model_api_key_map[model] = "OPENAI_API_KEY"
 
     env_var = []
+    # Loop over the models and create configuration dictionaries
     for model, config in model_api_key_map.items():
         if isinstance(config, str):
             api_key_env_var = config
@@ -342,18 +348,20 @@ def config_list_from_dotenv(
             config_without_key_var = {k: v for k, v in config.items() if k != "api_key_env_var"}
             config_dict = get_config(api_key=api_key, **config_without_key_var)
         else:
-            raise TypeError(f"Unsupported type {type(config)} for model {model} configuration")
+            logging.warning(f"Unsupported type {type(config)} for model {model} configuration")
 
         if not config_dict["api_key"] or config_dict["api_key"].strip() == "":
-            logging.warning("API key not found or empty. Please ensure path to .env file is correct.")
+            logging.warning(
+                f"API key not found or empty for model {model}. Please ensure path to .env file is correct."
+            )
             continue  # Skip this configuration and continue with the next
 
+        # Add model to the configuration and append to the list
         config_dict["model"] = model
         env_var.append(config_dict)
 
     with tempfile.NamedTemporaryFile(mode="w+", delete=True) as temp:
         env_var_str = json.dumps(env_var)
-        logging.info(f"JSON String: {env_var_str}")
         temp.write(env_var_str)
         temp.flush()
 

diff --git a/notebook/oai_openai_utils.ipynb b/notebook/oai_openai_utils.ipynb
@@ -9,21 +9,30 @@
     "Managing API configurations can be tricky, especially when dealing with multiple models and API versions. The provided utility functions assist users in managing these configurations effectively. Ensure your API keys and other sensitive data are stored securely. For local development, you might store keys in `.txt` or `.env` files or environment variables. Never expose your API keys publicly. If you insist on having your key files stored locally on your repo (you shouldn't), make sure the key file path is added to the `.gitignore` file.\n",
     "\n",
     "#### Steps:\n",
-    "1. Obtain API keys from OpenAI and optionally from Azure OpenAI.\n",
+    "1. Obtain API keys from OpenAI and optionally from Azure OpenAI (or other provider).\n",
     "2. Store them securely using either:\n",
     "    - Environment Variables: `export OPENAI_API_KEY='your-key'` in your shell.\n",
     "    - Text File: Save the key in a `key_openai.txt` file.\n",
-    "    - Env File: Save the key to a `.env` file eg: `OPENAI_API_KEY=sk-********************`"
+    "    - Env File: Save the key to a `.env` file eg: `OPENAI_API_KEY=sk-********************`\n",
+    "\n",
+    "---\n",
+    "\n",
+    "**TL;DR:** <br>\n",
+    "There are many ways to generate a `config_list` depending on your use case:\n",
+    "\n",
+    "- `get_config_list`: Generates configurations for API calls, primarily from provided API keys.\n",
+    "- `config_list_openai_aoai`: Constructs a list of configurations using both Azure OpenAI and OpenAI endpoints, sourcing API keys from environment variables or local files.\n",
+    "- `config_list_from_json`: Loads configurations from a JSON structure, either from an environment variable or a local JSON file, with the flexibility of filtering configurations based on given criteria.\n",
+    "- `config_list_from_models`: Creates configurations based on a provided list of models, useful when targeting specific models without manually specifying each configuration.\n",
+    "- `config_list_from_dotenv`: Constructs a configuration list from a `.env` file, offering a consolidated way to manage multiple API configurations and keys from a single file."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### `config_list`\n",
-    "\n",
-    "When instantiating an assistant, such as the example below, you see that it is being passed a config list. This is used to tell the assistant \n",
-    "\n",
+    "#### What is a `config_list`?\n",
+    "When instantiating an assistant, such as the example below, you see that it is being passed a `config_list`. This is used to tell the `AssistantAgent` which models or configurations it has access to:\n",
     "```python\n",
     "\n",
     "assistant = AssistantAgent(\n",
@@ -145,15 +154,16 @@
     "Your JSON struction should look something like this:\n",
     "\n",
     "```json\n",
-    "# OAI_CONFIG_LIST file\n",
+    "# OAI_CONFIG_LIST file example\n",
     "[\n",
     "    {\n",
     "        \"model\": \"gpt-4\",\n",
     "        \"api_key\": \"YOUR_OPENAI_API_KEY\"\n",
     "    },\n",
     "    {\n",
     "        \"model\": \"gpt-3.5-turbo\",\n",
-    "        \"api_key\": \"YOUR_OPENAI_API_KEY\"\n",
+    "        \"api_key\": \"YOUR_OPENAI_API_KEY\",\n",
+    "        \"api_version\": \"2023-03-01-preview\"\n",
     "    }\n",
     "]\n",
     "\n",
@@ -181,7 +191,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### `filter_dict`\n",
+    "#### What is `filter_dict`?\n",
     "\n",
     "The z parameter in `autogen.config_list_from_json` function is used to selectively filter the configurations loaded from the environment variable or JSON file based on specified criteria. It allows you to define criteria to select only those configurations that match the defined conditions.\n",
     "\n",
@@ -213,7 +223,7 @@
     ")\n",
     "\n",
     "# Assistant using GPT 3.5 Turbo\n",
-    "assistant_one = AssistantAgent(\n",
+    "assistant_one = autogen.AssistantAgent(\n",
     "    name=\"3.5-assistant\",\n",
     "    llm_config={\n",
     "        \"request_timeout\": 600,\n",
@@ -224,7 +234,7 @@
     ")\n",
     "\n",
     "# Assistant using GPT 4\n",
-    "assistant_two = AssistantAgent(\n",
+    "assistant_two = autogen.AssistantAgent(\n",
     "    name=\"4-assistant\",\n",
     "    llm_config={\n",
     "        \"request_timeout\": 600,\n",
@@ -331,7 +341,8 @@
     {
      "data": {
       "text/plain": [
-       "[{'api_key': 'sk-*********************', 'model': 'gpt-4'}]"
+       "[{'api_key': 'sk-*********************', 'model': 'gpt-4'},\n",
+       " {'api_key': 'sk-*********************', 'model': 'gpt-3.5-turbo'}]"
       ]
      },
      "execution_count": 1,
@@ -344,9 +355,6 @@
     "\n",
     "config_list = autogen.config_list_from_dotenv(\n",
     "    dotenv_file_path='.env', # If None the function will try find in the working directory\n",
-    "    model_api_key_map={ # String or dict accepted\n",
-    "        \"gpt-4\": \"OPENAI_API_KEY\",\n",
-    "    },\n",
     "    filter_dict={\n",
     "        \"model\": {\n",
     "            \"gpt-4\",\n",
@@ -366,8 +374,8 @@
     {
      "data": {
       "text/plain": [
-       "[{'api_key': 'sk-*********************', 'model': 'gpt-4'},\n",
-       " {'api_key': '**************************', 'model': 'vicuna'}]"
+       "[{'api_key': '1234567890234567890', 'model': 'gpt-4'},\n",
+       " {'api_key': 'sk-*********************', 'model': 'gpt-3.5-turbo'}]"
       ]
      },
      "execution_count": 2,
@@ -376,7 +384,42 @@
     }
    ],
    "source": [
+    "# gpt-3.5-turbo will default to OPENAI_API_KEY\n",
+    "config_list = autogen.config_list_from_dotenv(\n",
+    "    dotenv_file_path='.env', # If None the function will try find in the working directory\n",
+    "    model_api_key_map={\n",
+    "        \"gpt-4\": \"ANOTHER_API_KEY\",  # String or dict accepted\n",
+    "    },\n",
+    "    filter_dict={\n",
+    "        \"model\": {\n",
+    "            \"gpt-4\",\n",
+    "            \"gpt-3.5-turbo\",\n",
+    "        }\n",
+    "    }\n",
+    ")\n",
     "\n",
+    "config_list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'api_key': 'sk-*********************', 'model': 'gpt-4'},\n",
+       " {'api_key': '**************************', 'model': 'vicuna'}]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# example using different environment variable names\n",
     "config_list = autogen.config_list_from_dotenv(\n",
     "    dotenv_file_path='.env',\n",
     "    model_api_key_map={\n",
@@ -403,7 +446,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -417,7 +460,7 @@
        "  'model': 'gpt-3.5-turbo'}]"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }

diff --git a/test/oai/test_utils.py b/test/oai/test_utils.py
@@ -81,38 +81,78 @@ def test_config_list_from_dotenv(mock_os_environ, caplog):
         temp.write("\n".join([f"{k}={v}" for k, v in ENV_VARS.items()]))
         temp.flush()
 
-        config_list = autogen.config_list_from_dotenv(
-            dotenv_file_path=temp.name, model_api_key_map=MODEL_API_KEY_MAP, filter_dict=FILTER_DICT
-        )
+        # Use the updated config_list_from_dotenv function
+        config_list = autogen.config_list_from_dotenv(dotenv_file_path=temp.name)
 
         # Ensure configurations are loaded and API keys match expected values
-        assert config_list, "Config list is empty"
+        assert config_list, "Config list is empty with default API keys"
+
+        # Check that configurations only include models specified in the filter
         for config in config_list:
-            api_key_info = MODEL_API_KEY_MAP[config["model"]]
-            api_key_var_name = api_key_info if isinstance(api_key_info, str) else api_key_info["api_key_env_var"]
-            assert config["api_key"] == ENV_VARS[api_key_var_name], "API Key mismatch in valid case"
+            assert config["model"] in FILTER_DICT["model"], f"Model {config['model']} not in filter"
+
+        # Check the default API key for gpt-4 and gpt-3.5-turbo when model_api_key_map is None
+        config_list = autogen.config_list_from_dotenv(dotenv_file_path=temp.name, model_api_key_map=None)
+
+        expected_api_key = os.getenv("OPENAI_API_KEY")
+        assert any(
+            config["model"] == "gpt-4" and config["api_key"] == expected_api_key for config in config_list
+        ), "Default gpt-4 configuration not found or incorrect"
+        assert any(
+            config["model"] == "gpt-3.5-turbo" and config["api_key"] == expected_api_key for config in config_list
+        ), "Default gpt-3.5-turbo configuration not found or incorrect"
 
     # Test with missing dotenv file
-    with pytest.raises(FileNotFoundError, match=r"The specified \.env file .* does not exist\."):
-        autogen.config_list_from_dotenv(dotenv_file_path="non_existent_path")
+    with caplog.at_level(logging.WARNING):
+        config_list = autogen.config_list_from_dotenv(dotenv_file_path="non_existent_path")
+        assert "The specified .env file non_existent_path does not exist." in caplog.text
 
     # Test with invalid API key
     ENV_VARS["ANOTHER_API_KEY"] = ""  # Removing ANOTHER_API_KEY value
 
     with caplog.at_level(logging.WARNING):
-        result = autogen.config_list_from_dotenv(model_api_key_map=MODEL_API_KEY_MAP)
+        config_list = autogen.config_list_from_dotenv()
         assert "No .env file found. Loading configurations from environment variables." in caplog.text
         # The function does not return an empty list if at least one configuration is loaded successfully
-        assert result != [], "Config list is empty"
+        assert config_list != [], "Config list is empty"
 
     # Test with no configurations loaded
     invalid_model_api_key_map = {
         "gpt-4": "INVALID_API_KEY",  # Simulate an environment var name that doesn't exist
     }
     with caplog.at_level(logging.ERROR):
+        # Mocking `config_list_from_json` to return an empty list and raise an exception when called
+        with mock.patch("autogen.config_list_from_json", return_value=[], side_effect=Exception("Mock called")):
+            # Call the function with the invalid map
+            config_list = autogen.config_list_from_dotenv(
+                model_api_key_map=invalid_model_api_key_map,
+                filter_dict={
+                    "model": {
+                        "gpt-4",
+                    }
+                },
+            )
+
+            # Assert that the configuration list is empty
+            assert not config_list, "Expected no configurations to be loaded"
+
+    # test for mixed validity in the keymap
+    invalid_model_api_key_map = {
+        "gpt-4": "INVALID_API_KEY",
+        "gpt-3.5-turbo": "ANOTHER_API_KEY",  # valid according to the example configs
+    }
+
+    with caplog.at_level(logging.WARNING):
+        # Call the function with the mixed validity map
         config_list = autogen.config_list_from_dotenv(model_api_key_map=invalid_model_api_key_map)
-    assert "No configurations loaded." in caplog.text
-    assert not config_list
+        assert config_list, "Expected configurations to be loaded"
+        assert any(
+            config["model"] == "gpt-3.5-turbo" for config in config_list
+        ), "gpt-3.5-turbo configuration not found"
+        assert all(
+            config["model"] != "gpt-4" for config in config_list
+        ), "gpt-4 configuration found, but was not expected"
+        assert "API key not found or empty for model gpt-4" in caplog.text
 
 
 if __name__ == "__main__":