File tree Expand file tree Collapse file tree 7 files changed +64
-4
lines changed Expand file tree Collapse file tree 7 files changed +64
-4
lines changed Original file line number Diff line number Diff line change @@ -124,6 +124,7 @@ run: check-env-run
124124	  --env VLLM_API_TOKEN=$(ANSIBLE_CHATBOT_VLLM_API_TOKEN )  \ 
125125	  --env INFERENCE_MODEL=$(ANSIBLE_CHATBOT_INFERENCE_MODEL )  \ 
126126	  --env INFERENCE_MODEL_FILTER=$(ANSIBLE_CHATBOT_INFERENCE_MODEL_FILTER )  \ 
127+ 	  --env GEMINI_API_KEY=$(GEMINI_API_KEY )  \ 
127128	  ansible-chatbot-stack:$(ANSIBLE_CHATBOT_VERSION ) 
128129
129130run-test :
@@ -162,6 +163,7 @@ run-local-db: check-env-run-local-db
162163	  --env VLLM_API_TOKEN=$(ANSIBLE_CHATBOT_VLLM_API_TOKEN )  \ 
163164	  --env INFERENCE_MODEL=$(ANSIBLE_CHATBOT_INFERENCE_MODEL )  \ 
164165	  --env INFERENCE_MODEL_FILTER=$(ANSIBLE_CHATBOT_INFERENCE_MODEL_FILTER )  \ 
166+ 	  --env GEMINI_API_KEY=$(GEMINI_API_KEY )  \ 
165167	  ansible-chatbot-stack:$(ANSIBLE_CHATBOT_VERSION ) 
166168
167169clean :
Original file line number Diff line number Diff line change @@ -154,6 +154,20 @@ Runs basic tests against the local container.
154154    kubectl apply -f my-chatbot-stack-deploy.yaml
155155``` 
156156
157+ ## Appendix - Google Gemini  
158+ 
159+ *  Please set the environment variable ` OPENAI_API_KEY=<YOUR_API_KEY> ` 
160+ *  Example of a ` v1/query `  request:
161+ ``` json 
162+ {
163+     "query" : " hello" 
164+     "system_prompt" : " You are a helpful assistant." 
165+     "model" : " gemini/gemini-2.5-flash" 
166+     "provider" : " gemini" 
167+ }
168+ ``` 
169+ 
170+ 
157171## Appendix - Host clean-up  
158172
159173If you have the need for re-building images, apply the following clean-ups right before:
Original file line number Diff line number Diff line change @@ -18,6 +18,10 @@ providers:
1818      max_tokens : ${env.VLLM_MAX_TOKENS:=4096} 
1919      api_token : ${env.VLLM_API_TOKEN:=fake} 
2020      tls_verify : ${env.VLLM_TLS_VERIFY:=true} 
21+   - provider_id : gemini 
22+     provider_type : remote::gemini 
23+     config :
24+       api_key : ${env.GEMINI_API_KEY:=fake} 
2125  - provider_id : inline_sentence-transformer 
2226    provider_type : inline::sentence-transformers 
2327    config : {} 
@@ -85,6 +89,11 @@ models:
8589  model_id : ${env.EMBEDDINGS_MODEL:=/.llama/data/distributions/ansible-chatbot/embeddings_model} 
8690  provider_id : inline_sentence-transformer 
8791  model_type : embedding 
92+ - metadata : {} 
93+   model_id : ${env.GEMINI_INFERENCE_MODEL:=gemini/gemini-2.5-flash} 
94+   provider_id : gemini 
95+   provider_model_id : gemini/gemini-2.5-flash 
96+   model_type : llm 
8897shields : [] 
8998vector_dbs :
9099- metadata : {} 
Original file line number Diff line number Diff line change @@ -18,6 +18,10 @@ providers:
1818      max_tokens : ${env.VLLM_MAX_TOKENS:=4096} 
1919      api_token : ${env.VLLM_API_TOKEN:=fake} 
2020      tls_verify : ${env.VLLM_TLS_VERIFY:=true} 
21+   - provider_id : gemini 
22+     provider_type : remote::gemini 
23+     config :
24+       api_key : ${env.GEMINI_API_KEY:=fake} 
2125  - provider_id : inline_sentence-transformer 
2226    provider_type : inline::sentence-transformers 
2327    config : {} 
@@ -85,6 +89,11 @@ models:
8589  model_id : ${env.EMBEDDINGS_MODEL:=/.llama/data/distributions/ansible-chatbot/embeddings_model} 
8690  provider_id : inline_sentence-transformer 
8791  model_type : embedding 
92+ - metadata : {} 
93+   model_id : ${env.GEMINI_INFERENCE_MODEL:=gemini/gemini-2.5-flash} 
94+   provider_id : gemini 
95+   provider_model_id : gemini/gemini-2.5-flash 
96+   model_type : llm 
8897shields : [] 
8998vector_dbs :
9099- metadata : {} 
Original file line number Diff line number Diff line change @@ -15,6 +15,7 @@ dependencies = [
1515    " opentelemetry-exporter-otlp~=1.34.1" 
1616    " sentence-transformers>=5.0.0" 
1717    " sqlalchemy~=2.0.41" 
18+     " litellm~=1.75.3" 
1819]
1920
2021[dependency-groups ]
Original file line number Diff line number Diff line change @@ -37,6 +37,7 @@ joblib==1.5.1
3737jsonschema == 4.24.0 
3838jsonschema-specifications == 2025.4.1 
3939lightspeed-stack-providers == 0.1.14 
40+ litellm == 1.75.5.post1 
4041llama-api-client == 0.1.2 
4142llama-stack == 0.2.16 
4243llama-stack-client == 0.2.16 
@@ -62,7 +63,7 @@ nvidia-cusparselt-cu12==0.6.3 ; platform_machine == 'x86_64' and sys_platform ==
6263nvidia-nccl-cu12 == 2.26.2  ; platform_machine  ==  'x86_64'  and sys_platform == 'linux' 
6364nvidia-nvjitlink-cu12 == 12.6.85  ; platform_machine  ==  'x86_64'  and sys_platform == 'linux' 
6465nvidia-nvtx-cu12 == 12.6.77  ; platform_machine  ==  'x86_64'  and sys_platform == 'linux' 
65- openai == 1.91.0  
66+ openai == 1.99.9  
6667opentelemetry-api == 1.34.1 
6768opentelemetry-exporter-otlp == 1.34.1 
6869opentelemetry-exporter-otlp-proto-common == 1.34.1 
 
 
   
 
     
   
   
          
    
    
     
    
      
     
     
    You can’t perform that action at this time.
  
 
    
  
    
      
        
     
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments