diff --git a/docs/my-website/docs/interactions.md b/docs/my-website/docs/interactions.md index 32c82a1589c8..8014bf05367a 100644 --- a/docs/my-website/docs/interactions.md +++ b/docs/my-website/docs/interactions.md @@ -130,13 +130,12 @@ Point the Google GenAI SDK to LiteLLM Proxy: ```python showLineNumbers title="Google GenAI SDK with LiteLLM Proxy" from google import genai -import os # Point SDK to LiteLLM Proxy -os.environ["GOOGLE_GENAI_BASE_URL"] = "http://localhost:4000" -os.environ["GEMINI_API_KEY"] = "sk-1234" # Your LiteLLM API key - -client = genai.Client() +client = genai.Client( + api_key="sk-1234", # Your LiteLLM API key + http_options={"base_url": "http://localhost:4000"}, +) # Create an interaction interaction = client.interactions.create( @@ -151,12 +150,11 @@ print(interaction.outputs[-1].text) ```python showLineNumbers title="Google GenAI SDK Streaming" from google import genai -import os - -os.environ["GOOGLE_GENAI_BASE_URL"] = "http://localhost:4000" -os.environ["GEMINI_API_KEY"] = "sk-1234" -client = genai.Client() +client = genai.Client( + api_key="sk-1234", # Your LiteLLM API key + http_options={"base_url": "http://localhost:4000"}, +) for chunk in client.interactions.create_stream( model="gemini/gemini-2.5-flash", diff --git a/docs/my-website/docs/pass_through/google_ai_studio.md b/docs/my-website/docs/pass_through/google_ai_studio.md index 3de7c54aa7a2..d87c17fa7eea 100644 --- a/docs/my-website/docs/pass_through/google_ai_studio.md +++ b/docs/my-website/docs/pass_through/google_ai_studio.md @@ -35,26 +35,25 @@ curl 'http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:countTokens?key= ``` - + ```javascript -const { GoogleGenerativeAI } = require("@google/generative-ai"); - -const modelParams = { - model: 'gemini-pro', -}; - -const requestOptions = { - baseUrl: 'http://localhost:4000/gemini', // http:///gemini -}; - -const genAI = new GoogleGenerativeAI("sk-1234"); // litellm proxy API key -const model = genAI.getGenerativeModel(modelParams, requestOptions); +const { GoogleGenAI } = require("@google/genai"); + +const ai = new GoogleGenAI({ + apiKey: "sk-1234", // litellm proxy API key + httpOptions: { + baseUrl: "http://localhost:4000/gemini", // http:///gemini + }, +}); async function main() { try { - const result = await model.generateContent("Explain how AI works"); - console.log(result.response.text()); + const response = await ai.models.generateContent({ + model: "gemini-2.5-flash", + contents: "Explain how AI works", + }); + console.log(response.text); } catch (error) { console.error('Error:', error); } @@ -63,12 +62,13 @@ async function main() { // For streaming responses async function main_streaming() { try { - const streamingResult = await model.generateContentStream("Explain how AI works"); - for await (const chunk of streamingResult.stream) { - console.log('Stream chunk:', JSON.stringify(chunk)); + const response = await ai.models.generateContentStream({ + model: "gemini-2.5-flash", + contents: "Explain how AI works", + }); + for await (const chunk of response) { + process.stdout.write(chunk.text); } - const aggregatedResponse = await streamingResult.response; - console.log('Aggregated response:', JSON.stringify(aggregatedResponse)); } catch (error) { console.error('Error:', error); } @@ -321,29 +321,28 @@ curl 'http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:generateContent? ``` - + ```javascript -const { GoogleGenerativeAI } = require("@google/generative-ai"); - -const modelParams = { - model: 'gemini-pro', -}; - -const requestOptions = { - baseUrl: 'http://localhost:4000/gemini', // http:///gemini - customHeaders: { - "tags": "gemini-js-sdk,pass-through-endpoint" - } -}; - -const genAI = new GoogleGenerativeAI("sk-1234"); -const model = genAI.getGenerativeModel(modelParams, requestOptions); +const { GoogleGenAI } = require("@google/genai"); + +const ai = new GoogleGenAI({ + apiKey: "sk-1234", + httpOptions: { + baseUrl: "http://localhost:4000/gemini", // http:///gemini + headers: { + "tags": "gemini-js-sdk,pass-through-endpoint", + }, + }, +}); async function main() { try { - const result = await model.generateContent("Explain how AI works"); - console.log(result.response.text()); + const response = await ai.models.generateContent({ + model: "gemini-2.5-flash", + contents: "Explain how AI works", + }); + console.log(response.text); } catch (error) { console.error('Error:', error); } diff --git a/docs/my-website/docs/tutorials/google_genai_sdk.md b/docs/my-website/docs/tutorials/google_genai_sdk.md new file mode 100644 index 000000000000..b0538795c4d3 --- /dev/null +++ b/docs/my-website/docs/tutorials/google_genai_sdk.md @@ -0,0 +1,406 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Google GenAI SDK with LiteLLM + +Use Google's official GenAI SDK (JavaScript/TypeScript and Python) with any LLM provider through LiteLLM Proxy. + +The Google GenAI SDK (`@google/genai` for JS, `google-genai` for Python) provides a native interface for calling Gemini models. By pointing it to LiteLLM, you can use the same SDK with OpenAI, Anthropic, Bedrock, Azure, Vertex AI, or any other provider — while keeping the native Gemini request/response format. + +## Why Use LiteLLM with Google GenAI SDK? + +**Developer Benefits:** +- **Universal Model Access**: Use any LiteLLM-supported model (Anthropic, OpenAI, Vertex AI, Bedrock, etc.) through the Google GenAI SDK interface +- **Higher Rate Limits & Reliability**: Load balance across multiple models and providers to avoid hitting individual provider limits, with fallbacks to ensure you get responses even if one provider fails + +**Proxy Admin Benefits:** +- **Centralized Management**: Control access to all models through a single LiteLLM proxy instance without giving developers API keys to each provider +- **Budget Controls**: Set spending limits and track costs across all SDK usage +- **Logging & Observability**: Track all requests with cost tracking, logging, and analytics + +| Feature | Supported | Notes | +|---------|-----------|-------| +| Cost Tracking | ✅ | All models on `/generateContent` endpoint | +| Logging | ✅ | Works across all integrations | +| Streaming | ✅ | `streamGenerateContent` supported | +| Virtual Keys | ✅ | Use LiteLLM keys instead of Google keys | +| Load Balancing | ✅ | Via native router endpoints | +| Fallbacks | ✅ | Via native router endpoints | + +## Quick Start + +### 1. Install the SDK + + + + +```bash +npm install @google/genai +``` + + + + +```bash +pip install google-genai +``` + + + + +### 2. Start LiteLLM Proxy + +```yaml title="config.yaml" showLineNumbers +model_list: + - model_name: gemini-2.5-flash + litellm_params: + model: gemini/gemini-2.5-flash + api_key: os.environ/GEMINI_API_KEY +``` + +```bash +litellm --config config.yaml +``` + +### 3. Call the SDK through LiteLLM + + + + +```javascript title="index.js" showLineNumbers +const { GoogleGenAI } = require("@google/genai"); + +const ai = new GoogleGenAI({ + apiKey: "sk-1234", // LiteLLM virtual key (not a Google key) + httpOptions: { + baseUrl: "http://localhost:4000/gemini", // LiteLLM proxy URL + }, +}); + +async function main() { + const response = await ai.models.generateContent({ + model: "gemini-2.5-flash", + contents: "Explain how AI works", + }); + console.log(response.text); +} + +main(); +``` + + + + +```python title="main.py" showLineNumbers +from google import genai + +client = genai.Client( + api_key="sk-1234", # LiteLLM virtual key (not a Google key) + http_options={"base_url": "http://localhost:4000/gemini"}, # LiteLLM proxy URL +) + +response = client.models.generate_content( + model="gemini-2.5-flash", + contents="Explain how AI works", +) +print(response.text) +``` + + + + +```bash +curl "http://localhost:4000/gemini/v1beta/models/gemini-2.5-flash:generateContent?key=sk-1234" \ + -H 'Content-Type: application/json' \ + -X POST \ + -d '{ + "contents": [{ + "parts": [{"text": "Explain how AI works"}] + }] + }' +``` + + + + +## Streaming + + + + +```javascript title="streaming.js" showLineNumbers +const { GoogleGenAI } = require("@google/genai"); + +const ai = new GoogleGenAI({ + apiKey: "sk-1234", + httpOptions: { + baseUrl: "http://localhost:4000/gemini", + }, +}); + +async function main() { + const response = await ai.models.generateContentStream({ + model: "gemini-2.5-flash", + contents: "Write a short poem about the ocean", + }); + + for await (const chunk of response) { + process.stdout.write(chunk.text); + } +} + +main(); +``` + + + + +```python title="streaming.py" showLineNumbers +from google import genai + +client = genai.Client( + api_key="sk-1234", + http_options={"base_url": "http://localhost:4000/gemini"}, +) + +response = client.models.generate_content_stream( + model="gemini-2.5-flash", + contents="Write a short poem about the ocean", +) + +for chunk in response: + print(chunk.text, end="") +``` + + + + +## Multi-turn Chat + + + + +```javascript title="chat.js" showLineNumbers +const { GoogleGenAI } = require("@google/genai"); + +const ai = new GoogleGenAI({ + apiKey: "sk-1234", + httpOptions: { + baseUrl: "http://localhost:4000/gemini", + }, +}); + +async function main() { + const chat = ai.chats.create({ + model: "gemini-2.5-flash", + }); + + const response1 = await chat.sendMessage({ message: "I have 2 dogs and 3 cats." }); + console.log(response1.text); + + const response2 = await chat.sendMessage({ message: "How many pets is that in total?" }); + console.log(response2.text); +} + +main(); +``` + + + + +```python title="chat.py" showLineNumbers +from google import genai + +client = genai.Client( + api_key="sk-1234", + http_options={"base_url": "http://localhost:4000/gemini"}, +) + +chat = client.chats.create(model="gemini-2.5-flash") + +response1 = chat.send_message("I have 2 dogs and 3 cats.") +print(response1.text) + +response2 = chat.send_message("How many pets is that in total?") +print(response2.text) +``` + + + + + +## Advanced: Use Any Model with the GenAI SDK + +By default, the GenAI SDK talks to Gemini models. But with LiteLLM's router, you can route GenAI SDK requests to **any provider** — Anthropic, OpenAI, Bedrock, etc. + +This works by using `model_group_alias` to map Gemini model names to your desired provider models. LiteLLM handles the format translation internally. + +:::info + +For this to work, point the SDK `baseUrl` to `http://localhost:4000` (without `/gemini`). This routes requests through LiteLLM's native Google endpoints, which go through the router and support model aliasing. + +::: + + + + +Route `gemini-2.5-flash` requests to Claude Sonnet: + +```yaml title="config.yaml" showLineNumbers +model_list: + - model_name: claude-sonnet + litellm_params: + model: anthropic/claude-sonnet-4-20250514 + api_key: os.environ/ANTHROPIC_API_KEY + +router_settings: + model_group_alias: {"gemini-2.5-flash": "claude-sonnet"} +``` + + + + +Route `gemini-2.5-flash` requests to GPT-4o: + +```yaml title="config.yaml" showLineNumbers +model_list: + - model_name: gpt-4o-model + litellm_params: + model: gpt-4o + api_key: os.environ/OPENAI_API_KEY + +router_settings: + model_group_alias: {"gemini-2.5-flash": "gpt-4o-model"} +``` + + + + +Route `gemini-2.5-flash` requests to Claude on Bedrock: + +```yaml title="config.yaml" showLineNumbers +model_list: + - model_name: bedrock-claude + litellm_params: + model: bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0 + aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID + aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY + aws_region_name: us-east-1 + +router_settings: + model_group_alias: {"gemini-2.5-flash": "bedrock-claude"} +``` + + + + +Load balance across Anthropic and OpenAI: + +```yaml title="config.yaml" showLineNumbers +model_list: + - model_name: my-model + litellm_params: + model: anthropic/claude-sonnet-4-20250514 + api_key: os.environ/ANTHROPIC_API_KEY + - model_name: my-model + litellm_params: + model: gpt-4o + api_key: os.environ/OPENAI_API_KEY + +router_settings: + model_group_alias: {"gemini-2.5-flash": "my-model"} +``` + + + + +Then use the SDK with `baseUrl` pointing to LiteLLM (without `/gemini`): + + + + +```javascript title="any_model.js" showLineNumbers +const { GoogleGenAI } = require("@google/genai"); + +const ai = new GoogleGenAI({ + apiKey: "sk-1234", + httpOptions: { + baseUrl: "http://localhost:4000", // No /gemini — goes through the router + }, +}); + +async function main() { + // This calls Claude/GPT-4o/Bedrock under the hood via model_group_alias + const response = await ai.models.generateContent({ + model: "gemini-2.5-flash", + contents: "Hello from any model!", + }); + console.log(response.text); +} + +main(); +``` + + + + +```python title="any_model.py" showLineNumbers +from google import genai + +client = genai.Client( + api_key="sk-1234", + http_options={"base_url": "http://localhost:4000"}, # No /gemini +) + +# This calls Claude/GPT-4o/Bedrock under the hood via model_group_alias +response = client.models.generate_content( + model="gemini-2.5-flash", + contents="Hello from any model!", +) +print(response.text) +``` + + + + + +## Pass-through vs Native Router Endpoints + +LiteLLM offers two ways to handle GenAI SDK requests: + +| | Pass-through (`/gemini`) | Native Router (`/`) | +|---|---|---| +| **baseUrl** | `http://localhost:4000/gemini` | `http://localhost:4000` | +| **Models** | Gemini only | Any provider via `model_group_alias` | +| **Translation** | None — proxies directly to Google | Translates internally | +| **Cost Tracking** | ✅ | ✅ | +| **Virtual Keys** | ✅ | ✅ | +| **Load Balancing** | ❌ | ✅ | +| **Fallbacks** | ❌ | ✅ | +| **Best for** | Simple Gemini proxy | Multi-provider routing | + +## Environment Variable Configuration + +You can also configure the SDK via environment variables instead of code: + +```bash +# For JavaScript SDK (@google/genai) +export GOOGLE_GEMINI_BASE_URL="http://localhost:4000/gemini" +export GEMINI_API_KEY="sk-1234" + +# For Python SDK (google-genai) +# Note: The Python SDK does not support a base URL env var. +# Configure it in code with http_options={"base_url": "..."} instead. +export GEMINI_API_KEY="sk-1234" +``` + +This is especially useful for tools built on top of the GenAI SDK (like [Gemini CLI](./litellm_gemini_cli.md)). + +## Related Resources + +- [Gemini CLI with LiteLLM](./litellm_gemini_cli.md) +- [Google AI Studio Pass-Through](../pass_through/google_ai_studio) +- [Google ADK with LiteLLM](./google_adk.md) +- [LiteLLM Proxy Quick Start](../proxy/quick_start) +- [`@google/genai` npm package](https://www.npmjs.com/package/@google/genai) +- [`google-genai` PyPI package](https://pypi.org/project/google-genai/) diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 1d43484fd1b6..f3ed78298837 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -165,6 +165,7 @@ const sidebars = { "tutorials/cursor_integration", "tutorials/github_copilot_integration", "tutorials/litellm_gemini_cli", + "tutorials/google_genai_sdk", "tutorials/litellm_qwen_code_cli", "tutorials/openai_codex" ]