diff --git a/autogen/oai/gemini.py b/autogen/oai/gemini.py index d3a2ec03523d..f0ffae6c0a1b 100644 --- a/autogen/oai/gemini.py +++ b/autogen/oai/gemini.py @@ -96,8 +96,8 @@ def __init__(self, **kwargs): else: self.use_vertexai = False if not self.use_vertexai: - assert ("project_id" in kwargs) or ( - "location" in kwargs + assert ("project_id" not in kwargs) and ( + "location" not in kwargs ), "Google Cloud project and compute location cannot be set when using an API Key!" def message_retrieval(self, response) -> List: @@ -125,11 +125,9 @@ def get_usage(response) -> Dict: } def create(self, params: Dict) -> ChatCompletion: - if self.api_key is None: - self.use_vertexai = True + if self.use_vertexai: self.initialize_vartexai(**params) else: - self.use_vertexai = False assert ("project_id" not in params) and ( "location" not in params ), "Google Cloud project and compute location cannot be set when using an API Key!" @@ -274,9 +272,10 @@ def oai_content_to_gemini_content(self, content: Union[str, List]) -> List: rst.append(Part(text=msg["text"])) elif msg["type"] == "image_url": if self.use_vertexai: - img = get_image_data(msg["image_url"]["url"], use_b64=False) - # img = _to_pil(b64_img) - # img_part = VertexAIPart.from_image(img) + img_url = msg["image_url"]["url"] + re.match(r"data:image/(?:png|jpeg);base64,", img_url) + img = get_image_data(img_url, use_b64=False) + # image/png works with jpeg as well img_part = VertexAIPart.from_data(img, mime_type="image/png") rst.append(img_part) else: diff --git a/test/oai/test_gemini.py b/test/oai/test_gemini.py index 86bbf9853086..4f77d288789a 100644 --- a/test/oai/test_gemini.py +++ b/test/oai/test_gemini.py @@ -34,7 +34,7 @@ def gemini_client(): # Test compute location initialization and configuration -@pytest.mark.skipif(skip, reason="Google Cloud project and compute location cannot be set when using an API Key") +@pytest.mark.skipif(skip, reason="Google GenAI dependency is not installed") def test_compute_location_initialization(): with pytest.raises(AssertionError): GeminiClient( diff --git a/website/docs/topics/non-openai-models/cloud-gemini.ipynb b/website/docs/topics/non-openai-models/cloud-gemini.ipynb index a794b8552e5f..da773e0d4472 100644 --- a/website/docs/topics/non-openai-models/cloud-gemini.ipynb +++ b/website/docs/topics/non-openai-models/cloud-gemini.ipynb @@ -24,11 +24,13 @@ "\n", "## Features\n", "\n", - "There's no need to handle OpenAI or Google's GenAI packages separately; AutoGen manages all of these for you. You can easily create different agents with various backend LLMs using the assistant agent. All models and agents are readily accessible at your fingertips.\n", + "There's no need to handle OpenAI or Google's GenAI packages separately; AutoGen manages all of these for you. You can easily create different agents with various backend LLMs using the assistant agent. All models and agents are readily accessible at your fingertips. \n", + " \n", "\n", "## Main Distinctions\n", "\n", - "- Currently, Gemini does not include a \"system_message\" field. However, you can incorporate this instruction into the first message of your interaction." + "- Currently, Gemini does not include a \"system_message\" field. However, you can incorporate this instruction into the first message of your interaction.\n", + "- If no API key is specified for Gemini, then authentication will happen using the default google auth mechanism for Google Cloud. Service accounts are also supported, where the JSON key file has to be provided." ] }, { @@ -57,6 +59,16 @@ " \"api_type\": \"google\"\n", " },\n", " {\n", + " \"model\": \"gemini-1.5-pro-001\",\n", + " \"api_type\": \"google\"\n", + " },\n", + " {\n", + " \"model\": \"gemini-1.5-pro\",\n", + " \"project\": \"your-awesome-google-cloud-project-id\",\n", + " \"location\": \"us-west1\",\n", + " \"google_application_credentials\": \"your-google-service-account-key.json\"\n", + " },\n", + " {\n", " \"model\": \"gemini-pro-vision\",\n", " \"api_key\": \"your Google's GenAI Key goes here\",\n", " \"api_type\": \"google\"\n", @@ -110,7 +122,7 @@ "config_list_gemini = autogen.config_list_from_json(\n", " \"OAI_CONFIG_LIST\",\n", " filter_dict={\n", - " \"model\": [\"gemini-pro\"],\n", + " \"model\": [\"gemini-pro\", \"gemini-1.5-pro\", \"gemini-1.5-pro-001\"],\n", " },\n", ")\n", "\n",