From f9680695b41e079eebea10e4342b87708a679761 Mon Sep 17 00:00:00 2001
From: romartin <roger600@gmail.com>
Date: Tue, 12 Aug 2025 20:32:32 +0200
Subject: [PATCH] AAP-51301: Modify ansible-chatbot-stack build to include
 dependencies for gemini provider

---
 Makefile                     |  2 ++
 README.md                    | 14 ++++++++++++++
 ansible-chatbot-mcp-run.yaml |  9 +++++++++
 ansible-chatbot-run.yaml     |  9 +++++++++
 pyproject.toml               |  1 +
 requirements.txt             |  3 ++-
 uv.lock                      | 30 +++++++++++++++++++++++++++---
 7 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index ecf11ab..d3a08a3 100644
--- a/Makefile
+++ b/Makefile
@@ -124,6 +124,7 @@ run: check-env-run
 	  --env VLLM_API_TOKEN=$(ANSIBLE_CHATBOT_VLLM_API_TOKEN) \
 	  --env INFERENCE_MODEL=$(ANSIBLE_CHATBOT_INFERENCE_MODEL) \
 	  --env INFERENCE_MODEL_FILTER=$(ANSIBLE_CHATBOT_INFERENCE_MODEL_FILTER) \
+	  --env GEMINI_API_KEY=$(GEMINI_API_KEY) \
 	  ansible-chatbot-stack:$(ANSIBLE_CHATBOT_VERSION)
 
 run-test:
@@ -162,6 +163,7 @@ run-local-db: check-env-run-local-db
 	  --env VLLM_API_TOKEN=$(ANSIBLE_CHATBOT_VLLM_API_TOKEN) \
 	  --env INFERENCE_MODEL=$(ANSIBLE_CHATBOT_INFERENCE_MODEL) \
 	  --env INFERENCE_MODEL_FILTER=$(ANSIBLE_CHATBOT_INFERENCE_MODEL_FILTER) \
+	  --env GEMINI_API_KEY=$(GEMINI_API_KEY) \
 	  ansible-chatbot-stack:$(ANSIBLE_CHATBOT_VERSION)
 
 clean:
diff --git a/README.md b/README.md
index fae4b14..7e8dd9a 100644
--- a/README.md
+++ b/README.md
@@ -154,6 +154,20 @@ Runs basic tests against the local container.
     kubectl apply -f my-chatbot-stack-deploy.yaml
 ```
 
+## Appendix - Google Gemini
+
+* Please set the environment variable `OPENAI_API_KEY=<YOUR_API_KEY>`
+* Example of a `v1/query` request:
+```json
+{
+    "query": "hello",
+    "system_prompt": "You are a helpful assistant.",
+    "model": "gemini/gemini-2.5-flash",
+    "provider": "gemini"
+}
+```
+
+
 ## Appendix - Host clean-up
 
 If you have the need for re-building images, apply the following clean-ups right before:
diff --git a/ansible-chatbot-mcp-run.yaml b/ansible-chatbot-mcp-run.yaml
index b8898b5..2bc9dd0 100644
--- a/ansible-chatbot-mcp-run.yaml
+++ b/ansible-chatbot-mcp-run.yaml
@@ -18,6 +18,10 @@ providers:
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=fake}
   - provider_id: inline_sentence-transformer
     provider_type: inline::sentence-transformers
     config: {}
@@ -85,6 +89,11 @@ models:
   model_id: ${env.EMBEDDINGS_MODEL:=/.llama/data/distributions/ansible-chatbot/embeddings_model}
   provider_id: inline_sentence-transformer
   model_type: embedding
+- metadata: {}
+  model_id: ${env.GEMINI_INFERENCE_MODEL:=gemini/gemini-2.5-flash}
+  provider_id: gemini
+  provider_model_id: gemini/gemini-2.5-flash
+  model_type: llm
 shields: []
 vector_dbs:
 - metadata: {}
diff --git a/ansible-chatbot-run.yaml b/ansible-chatbot-run.yaml
index 5b9f8e3..7a17564 100644
--- a/ansible-chatbot-run.yaml
+++ b/ansible-chatbot-run.yaml
@@ -18,6 +18,10 @@ providers:
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=fake}
   - provider_id: inline_sentence-transformer
     provider_type: inline::sentence-transformers
     config: {}
@@ -85,6 +89,11 @@ models:
   model_id: ${env.EMBEDDINGS_MODEL:=/.llama/data/distributions/ansible-chatbot/embeddings_model}
   provider_id: inline_sentence-transformer
   model_type: embedding
+- metadata: {}
+  model_id: ${env.GEMINI_INFERENCE_MODEL:=gemini/gemini-2.5-flash}
+  provider_id: gemini
+  provider_model_id: gemini/gemini-2.5-flash
+  model_type: llm
 shields: []
 vector_dbs:
 - metadata: {}
diff --git a/pyproject.toml b/pyproject.toml
index 6dcfb8a..cfca899 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,6 +15,7 @@ dependencies = [
     "opentelemetry-exporter-otlp~=1.34.1",
     "sentence-transformers>=5.0.0",
     "sqlalchemy~=2.0.41",
+    "litellm~=1.75.3",
 ]
 
 [dependency-groups]
diff --git a/requirements.txt b/requirements.txt
index 1b3db66..8c0c457 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -37,6 +37,7 @@ joblib==1.5.1
 jsonschema==4.24.0
 jsonschema-specifications==2025.4.1
 lightspeed-stack-providers==0.1.14
+litellm==1.75.5.post1
 llama-api-client==0.1.2
 llama-stack==0.2.16
 llama-stack-client==0.2.16
@@ -62,7 +63,7 @@ nvidia-cusparselt-cu12==0.6.3 ; platform_machine == 'x86_64' and sys_platform ==
 nvidia-nccl-cu12==2.26.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
 nvidia-nvjitlink-cu12==12.6.85 ; platform_machine == 'x86_64' and sys_platform == 'linux'
 nvidia-nvtx-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-openai==1.91.0
+openai==1.99.9
 opentelemetry-api==1.34.1
 opentelemetry-exporter-otlp==1.34.1
 opentelemetry-exporter-otlp-proto-common==1.34.1
diff --git a/uv.lock b/uv.lock
index 3f8281e..f7fb389 100644
--- a/uv.lock
+++ b/uv.lock
@@ -108,6 +108,7 @@ dependencies = [
     { name = "faiss-cpu" },
     { name = "fire" },
     { name = "lightspeed-stack-providers" },
+    { name = "litellm" },
     { name = "mcp" },
     { name = "numpy" },
     { name = "opentelemetry-api" },
@@ -128,6 +129,7 @@ requires-dist = [
     { name = "faiss-cpu", specifier = "~=1.11.0" },
     { name = "fire", specifier = "~=0.7.0" },
     { name = "lightspeed-stack-providers", specifier = "==0.1.14" },
+    { name = "litellm", specifier = "~=1.75.3" },
     { name = "mcp", specifier = "~=1.9.4" },
     { name = "numpy", specifier = "==2.2.6" },
     { name = "opentelemetry-api", specifier = "~=1.34.1" },
@@ -805,6 +807,28 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0d/ae/d768eca3eaebe81ec63f4490ae485899008a0412270eb1b5ef469196d464/lightspeed_stack_providers-0.1.14-py3-none-any.whl", hash = "sha256:723037d571eb3fad082e680f82e176bb4d4f0cf47e0a865237849b3ef4c1ba2a", size = 25010, upload-time = "2025-08-13T09:07:54.264Z" },
 ]
 
+[[package]]
+name = "litellm"
+version = "1.75.5.post1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiohttp" },
+    { name = "click" },
+    { name = "httpx" },
+    { name = "importlib-metadata" },
+    { name = "jinja2" },
+    { name = "jsonschema" },
+    { name = "openai" },
+    { name = "pydantic" },
+    { name = "python-dotenv" },
+    { name = "tiktoken" },
+    { name = "tokenizers" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/10/97/6091a020895102a20f1da204ebe68c1293123555476b38e749f95ba5981c/litellm-1.75.5.post1.tar.gz", hash = "sha256:e40a0e4b25032755dc5df7f02742abe9e3b8836236363f605f3bdd363cb5a0d0", size = 10127846, upload-time = "2025-08-10T16:30:23.788Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8f/76/780f68a3b26227136a5147c76860aacedcae9bf1b7afc1c991ec9cad11bc/litellm-1.75.5.post1-py3-none-any.whl", hash = "sha256:1c72809a9c8f6e132ad06eb7e628f674c775b0ce6bfb58cbd37e8b585d929cb7", size = 8895997, upload-time = "2025-08-10T16:30:21.325Z" },
+]
+
 [[package]]
 name = "llama-api-client"
 version = "0.1.2"
@@ -1227,7 +1251,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "1.91.0"
+version = "1.99.9"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -1239,9 +1263,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/0f/e2/a22f2973b729eff3f1f429017bdf717930c5de0fbf9e14017bae330e4e7a/openai-1.91.0.tar.gz", hash = "sha256:d6b07730d2f7c6745d0991997c16f85cddfc90ddcde8d569c862c30716b9fc90", size = 472529, upload-time = "2025-06-23T18:27:10.961Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/8a/d2/ef89c6f3f36b13b06e271d3cc984ddd2f62508a0972c1cbcc8485a6644ff/openai-1.99.9.tar.gz", hash = "sha256:f2082d155b1ad22e83247c3de3958eb4255b20ccf4a1de2e6681b6957b554e92", size = 506992, upload-time = "2025-08-12T02:31:10.054Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7a/d2/f99bdd6fc737d6b3cf0df895508d621fc9a386b375a1230ee81d46c5436e/openai-1.91.0-py3-none-any.whl", hash = "sha256:207f87aa3bc49365e014fac2f7e291b99929f4fe126c4654143440e0ad446a5f", size = 735837, upload-time = "2025-06-23T18:27:08.913Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/fb/df274ca10698ee77b07bff952f302ea627cc12dac6b85289485dd77db6de/openai-1.99.9-py3-none-any.whl", hash = "sha256:9dbcdb425553bae1ac5d947147bebbd630d91bbfc7788394d4c4f3a35682ab3a", size = 786816, upload-time = "2025-08-12T02:31:08.34Z" },
 ]
 
 [[package]]