From f9680695b41e079eebea10e4342b87708a679761 Mon Sep 17 00:00:00 2001 From: romartin Date: Tue, 12 Aug 2025 20:32:32 +0200 Subject: [PATCH] AAP-51301: Modify ansible-chatbot-stack build to include dependencies for gemini provider --- Makefile | 2 ++ README.md | 14 ++++++++++++++ ansible-chatbot-mcp-run.yaml | 9 +++++++++ ansible-chatbot-run.yaml | 9 +++++++++ pyproject.toml | 1 + requirements.txt | 3 ++- uv.lock | 30 +++++++++++++++++++++++++++--- 7 files changed, 64 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index ecf11ab..d3a08a3 100644 --- a/Makefile +++ b/Makefile @@ -124,6 +124,7 @@ run: check-env-run --env VLLM_API_TOKEN=$(ANSIBLE_CHATBOT_VLLM_API_TOKEN) \ --env INFERENCE_MODEL=$(ANSIBLE_CHATBOT_INFERENCE_MODEL) \ --env INFERENCE_MODEL_FILTER=$(ANSIBLE_CHATBOT_INFERENCE_MODEL_FILTER) \ + --env GEMINI_API_KEY=$(GEMINI_API_KEY) \ ansible-chatbot-stack:$(ANSIBLE_CHATBOT_VERSION) run-test: @@ -162,6 +163,7 @@ run-local-db: check-env-run-local-db --env VLLM_API_TOKEN=$(ANSIBLE_CHATBOT_VLLM_API_TOKEN) \ --env INFERENCE_MODEL=$(ANSIBLE_CHATBOT_INFERENCE_MODEL) \ --env INFERENCE_MODEL_FILTER=$(ANSIBLE_CHATBOT_INFERENCE_MODEL_FILTER) \ + --env GEMINI_API_KEY=$(GEMINI_API_KEY) \ ansible-chatbot-stack:$(ANSIBLE_CHATBOT_VERSION) clean: diff --git a/README.md b/README.md index fae4b14..7e8dd9a 100644 --- a/README.md +++ b/README.md @@ -154,6 +154,20 @@ Runs basic tests against the local container. kubectl apply -f my-chatbot-stack-deploy.yaml ``` +## Appendix - Google Gemini + +* Please set the environment variable `OPENAI_API_KEY=` +* Example of a `v1/query` request: +```json +{ + "query": "hello", + "system_prompt": "You are a helpful assistant.", + "model": "gemini/gemini-2.5-flash", + "provider": "gemini" +} +``` + + ## Appendix - Host clean-up If you have the need for re-building images, apply the following clean-ups right before: diff --git a/ansible-chatbot-mcp-run.yaml b/ansible-chatbot-mcp-run.yaml index b8898b5..2bc9dd0 100644 --- a/ansible-chatbot-mcp-run.yaml +++ b/ansible-chatbot-mcp-run.yaml @@ -18,6 +18,10 @@ providers: max_tokens: ${env.VLLM_MAX_TOKENS:=4096} api_token: ${env.VLLM_API_TOKEN:=fake} tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY:=fake} - provider_id: inline_sentence-transformer provider_type: inline::sentence-transformers config: {} @@ -85,6 +89,11 @@ models: model_id: ${env.EMBEDDINGS_MODEL:=/.llama/data/distributions/ansible-chatbot/embeddings_model} provider_id: inline_sentence-transformer model_type: embedding +- metadata: {} + model_id: ${env.GEMINI_INFERENCE_MODEL:=gemini/gemini-2.5-flash} + provider_id: gemini + provider_model_id: gemini/gemini-2.5-flash + model_type: llm shields: [] vector_dbs: - metadata: {} diff --git a/ansible-chatbot-run.yaml b/ansible-chatbot-run.yaml index 5b9f8e3..7a17564 100644 --- a/ansible-chatbot-run.yaml +++ b/ansible-chatbot-run.yaml @@ -18,6 +18,10 @@ providers: max_tokens: ${env.VLLM_MAX_TOKENS:=4096} api_token: ${env.VLLM_API_TOKEN:=fake} tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY:=fake} - provider_id: inline_sentence-transformer provider_type: inline::sentence-transformers config: {} @@ -85,6 +89,11 @@ models: model_id: ${env.EMBEDDINGS_MODEL:=/.llama/data/distributions/ansible-chatbot/embeddings_model} provider_id: inline_sentence-transformer model_type: embedding +- metadata: {} + model_id: ${env.GEMINI_INFERENCE_MODEL:=gemini/gemini-2.5-flash} + provider_id: gemini + provider_model_id: gemini/gemini-2.5-flash + model_type: llm shields: [] vector_dbs: - metadata: {} diff --git a/pyproject.toml b/pyproject.toml index 6dcfb8a..cfca899 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "opentelemetry-exporter-otlp~=1.34.1", "sentence-transformers>=5.0.0", "sqlalchemy~=2.0.41", + "litellm~=1.75.3", ] [dependency-groups] diff --git a/requirements.txt b/requirements.txt index 1b3db66..8c0c457 100644 --- a/requirements.txt +++ b/requirements.txt @@ -37,6 +37,7 @@ joblib==1.5.1 jsonschema==4.24.0 jsonschema-specifications==2025.4.1 lightspeed-stack-providers==0.1.14 +litellm==1.75.5.post1 llama-api-client==0.1.2 llama-stack==0.2.16 llama-stack-client==0.2.16 @@ -62,7 +63,7 @@ nvidia-cusparselt-cu12==0.6.3 ; platform_machine == 'x86_64' and sys_platform == nvidia-nccl-cu12==2.26.2 ; platform_machine == 'x86_64' and sys_platform == 'linux' nvidia-nvjitlink-cu12==12.6.85 ; platform_machine == 'x86_64' and sys_platform == 'linux' nvidia-nvtx-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux' -openai==1.91.0 +openai==1.99.9 opentelemetry-api==1.34.1 opentelemetry-exporter-otlp==1.34.1 opentelemetry-exporter-otlp-proto-common==1.34.1 diff --git a/uv.lock b/uv.lock index 3f8281e..f7fb389 100644 --- a/uv.lock +++ b/uv.lock @@ -108,6 +108,7 @@ dependencies = [ { name = "faiss-cpu" }, { name = "fire" }, { name = "lightspeed-stack-providers" }, + { name = "litellm" }, { name = "mcp" }, { name = "numpy" }, { name = "opentelemetry-api" }, @@ -128,6 +129,7 @@ requires-dist = [ { name = "faiss-cpu", specifier = "~=1.11.0" }, { name = "fire", specifier = "~=0.7.0" }, { name = "lightspeed-stack-providers", specifier = "==0.1.14" }, + { name = "litellm", specifier = "~=1.75.3" }, { name = "mcp", specifier = "~=1.9.4" }, { name = "numpy", specifier = "==2.2.6" }, { name = "opentelemetry-api", specifier = "~=1.34.1" }, @@ -805,6 +807,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0d/ae/d768eca3eaebe81ec63f4490ae485899008a0412270eb1b5ef469196d464/lightspeed_stack_providers-0.1.14-py3-none-any.whl", hash = "sha256:723037d571eb3fad082e680f82e176bb4d4f0cf47e0a865237849b3ef4c1ba2a", size = 25010, upload-time = "2025-08-13T09:07:54.264Z" }, ] +[[package]] +name = "litellm" +version = "1.75.5.post1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "click" }, + { name = "httpx" }, + { name = "importlib-metadata" }, + { name = "jinja2" }, + { name = "jsonschema" }, + { name = "openai" }, + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "tiktoken" }, + { name = "tokenizers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/10/97/6091a020895102a20f1da204ebe68c1293123555476b38e749f95ba5981c/litellm-1.75.5.post1.tar.gz", hash = "sha256:e40a0e4b25032755dc5df7f02742abe9e3b8836236363f605f3bdd363cb5a0d0", size = 10127846, upload-time = "2025-08-10T16:30:23.788Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/76/780f68a3b26227136a5147c76860aacedcae9bf1b7afc1c991ec9cad11bc/litellm-1.75.5.post1-py3-none-any.whl", hash = "sha256:1c72809a9c8f6e132ad06eb7e628f674c775b0ce6bfb58cbd37e8b585d929cb7", size = 8895997, upload-time = "2025-08-10T16:30:21.325Z" }, +] + [[package]] name = "llama-api-client" version = "0.1.2" @@ -1227,7 +1251,7 @@ wheels = [ [[package]] name = "openai" -version = "1.91.0" +version = "1.99.9" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1239,9 +1263,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0f/e2/a22f2973b729eff3f1f429017bdf717930c5de0fbf9e14017bae330e4e7a/openai-1.91.0.tar.gz", hash = "sha256:d6b07730d2f7c6745d0991997c16f85cddfc90ddcde8d569c862c30716b9fc90", size = 472529, upload-time = "2025-06-23T18:27:10.961Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8a/d2/ef89c6f3f36b13b06e271d3cc984ddd2f62508a0972c1cbcc8485a6644ff/openai-1.99.9.tar.gz", hash = "sha256:f2082d155b1ad22e83247c3de3958eb4255b20ccf4a1de2e6681b6957b554e92", size = 506992, upload-time = "2025-08-12T02:31:10.054Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7a/d2/f99bdd6fc737d6b3cf0df895508d621fc9a386b375a1230ee81d46c5436e/openai-1.91.0-py3-none-any.whl", hash = "sha256:207f87aa3bc49365e014fac2f7e291b99929f4fe126c4654143440e0ad446a5f", size = 735837, upload-time = "2025-06-23T18:27:08.913Z" }, + { url = "https://files.pythonhosted.org/packages/e8/fb/df274ca10698ee77b07bff952f302ea627cc12dac6b85289485dd77db6de/openai-1.99.9-py3-none-any.whl", hash = "sha256:9dbcdb425553bae1ac5d947147bebbd630d91bbfc7788394d4c4f3a35682ab3a", size = 786816, upload-time = "2025-08-12T02:31:08.34Z" }, ] [[package]]