diff --git a/docs/my-website/docs/providers/gmi.md b/docs/my-website/docs/providers/gmi.md new file mode 100644 index 0000000000..8e32146323 --- /dev/null +++ b/docs/my-website/docs/providers/gmi.md @@ -0,0 +1,140 @@ +# GMI Cloud + +## Overview + +| Property | Details | +|-------|-------| +| Description | GMI Cloud is a GPU cloud infrastructure provider offering access to top AI models including Claude, GPT, DeepSeek, Gemini, and more through OpenAI-compatible APIs. | +| Provider Route on LiteLLM | `gmi/` | +| Link to Provider Doc | [GMI Cloud Docs ↗](https://docs.gmicloud.ai) | +| Base URL | `https://api.gmi-serving.com/v1` | +| Supported Operations | [`/chat/completions`](#sample-usage), [`/models`](#supported-models) | + +
+ +## What is GMI Cloud? + +GMI Cloud is a venture-backed digital infrastructure company ($82M+ funding) providing: +- **Top-tier GPU Access**: NVIDIA H100 GPUs for AI workloads +- **Multiple AI Models**: Claude, GPT, DeepSeek, Gemini, Kimi, Qwen, and more +- **OpenAI-Compatible API**: Drop-in replacement for OpenAI SDK +- **Global Infrastructure**: Data centers in US (Colorado) and APAC (Taiwan) + +## Required Variables + +```python showLineNumbers title="Environment Variables" +os.environ["GMI_API_KEY"] = "" # your GMI Cloud API key +``` + +Get your GMI Cloud API key from [console.gmicloud.ai](https://console.gmicloud.ai). + +## Usage - LiteLLM Python SDK + +### Non-streaming + +```python showLineNumbers title="GMI Cloud Non-streaming Completion" +import os +import litellm +from litellm import completion + +os.environ["GMI_API_KEY"] = "" # your GMI Cloud API key + +messages = [{"content": "What is the capital of France?", "role": "user"}] + +# GMI Cloud call +response = completion( + model="gmi/deepseek-ai/DeepSeek-V3.2", + messages=messages +) + +print(response) +``` + +### Streaming + +```python showLineNumbers title="GMI Cloud Streaming Completion" +import os +import litellm +from litellm import completion + +os.environ["GMI_API_KEY"] = "" # your GMI Cloud API key + +messages = [{"content": "Write a short poem about AI", "role": "user"}] + +# GMI Cloud call with streaming +response = completion( + model="gmi/anthropic/claude-sonnet-4.5", + messages=messages, + stream=True +) + +for chunk in response: + print(chunk) +``` + +## Usage - LiteLLM Proxy Server + +### 1. Save key in your environment + +```bash +export GMI_API_KEY="" +``` + +### 2. Start the proxy + +```yaml +model_list: + - model_name: deepseek-v3 + litellm_params: + model: gmi/deepseek-ai/DeepSeek-V3.2 + api_key: os.environ/GMI_API_KEY + - model_name: claude-sonnet + litellm_params: + model: gmi/anthropic/claude-sonnet-4.5 + api_key: os.environ/GMI_API_KEY +``` + +## Supported Models + +| Model | Model ID | Context Length | +|-------|----------|----------------| +| Claude Opus 4.5 | `gmi/anthropic/claude-opus-4.5` | 409K | +| Claude Sonnet 4.5 | `gmi/anthropic/claude-sonnet-4.5` | 409K | +| Claude Sonnet 4 | `gmi/anthropic/claude-sonnet-4` | 409K | +| Claude Opus 4 | `gmi/anthropic/claude-opus-4` | 409K | +| GPT-5.2 | `gmi/openai/gpt-5.2` | 409K | +| GPT-5.1 | `gmi/openai/gpt-5.1` | 409K | +| GPT-5 | `gmi/openai/gpt-5` | 409K | +| GPT-4o | `gmi/openai/gpt-4o` | 131K | +| GPT-4o-mini | `gmi/openai/gpt-4o-mini` | 131K | +| DeepSeek V3.2 | `gmi/deepseek-ai/DeepSeek-V3.2` | 163K | +| DeepSeek V3 0324 | `gmi/deepseek-ai/DeepSeek-V3-0324` | 163K | +| Gemini 3 Pro | `gmi/google/gemini-3-pro-preview` | 1M | +| Gemini 3 Flash | `gmi/google/gemini-3-flash-preview` | 1M | +| Kimi K2 Thinking | `gmi/moonshotai/Kimi-K2-Thinking` | 262K | +| MiniMax M2.1 | `gmi/MiniMaxAI/MiniMax-M2.1` | 196K | +| Qwen3-VL 235B | `gmi/Qwen/Qwen3-VL-235B-A22B-Instruct-FP8` | 262K | +| GLM-4.7 | `gmi/zai-org/GLM-4.7-FP8` | 202K | + +## Supported OpenAI Parameters + +GMI Cloud supports all standard OpenAI-compatible parameters: + +| Parameter | Type | Description | +|-----------|------|-------------| +| `messages` | array | **Required**. Array of message objects with 'role' and 'content' | +| `model` | string | **Required**. Model ID from available models | +| `stream` | boolean | Optional. Enable streaming responses | +| `temperature` | float | Optional. Sampling temperature | +| `top_p` | float | Optional. Nucleus sampling parameter | +| `max_tokens` | integer | Optional. Maximum tokens to generate | +| `frequency_penalty` | float | Optional. Penalize frequent tokens | +| `presence_penalty` | float | Optional. Penalize tokens based on presence | +| `stop` | string/array | Optional. Stop sequences | +| `response_format` | object | Optional. JSON mode with `{"type": "json_object"}` | + +## Additional Resources + +- [GMI Cloud Website](https://www.gmicloud.ai) +- [GMI Cloud Documentation](https://docs.gmicloud.ai) +- [GMI Cloud Console](https://console.gmicloud.ai) diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 1a8a53774c..d2af237141 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -719,6 +719,7 @@ const sidebars = { "providers/galadriel", "providers/github", "providers/github_copilot", + "providers/gmi", "providers/chatgpt", "providers/gradient_ai", "providers/groq", diff --git a/litellm/llms/openai_like/providers.json b/litellm/llms/openai_like/providers.json index bda3684a8a..4aefe58394 100644 --- a/litellm/llms/openai_like/providers.json +++ b/litellm/llms/openai_like/providers.json @@ -71,5 +71,9 @@ "param_mappings": { "max_completion_tokens": "max_tokens" } + }, + "gmi": { + "base_url": "https://api.gmi-serving.com/v1", + "api_key_env": "GMI_API_KEY" } } diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 135b0d46ed..d35f9b80ce 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -16094,6 +16094,181 @@ "output_cost_per_token": 0.0, "output_vector_size": 2560 }, + "gmi/anthropic/claude-opus-4.5": { + "input_cost_per_token": 5e-06, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/anthropic/claude-sonnet-4.5": { + "input_cost_per_token": 3e-06, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/anthropic/claude-sonnet-4": { + "input_cost_per_token": 3e-06, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/anthropic/claude-opus-4": { + "input_cost_per_token": 1.5e-05, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/openai/gpt-5.2": { + "input_cost_per_token": 1.75e-06, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1.4e-05, + "supports_function_calling": true + }, + "gmi/openai/gpt-5.1": { + "input_cost_per_token": 1.25e-06, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true + }, + "gmi/openai/gpt-5": { + "input_cost_per_token": 1.25e-06, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true + }, + "gmi/openai/gpt-4o": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "gmi", + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/openai/gpt-4o-mini": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "gmi", + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/deepseek-ai/DeepSeek-V3.2": { + "input_cost_per_token": 2.8e-07, + "litellm_provider": "gmi", + "max_input_tokens": 163840, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_function_calling": true + }, + "gmi/deepseek-ai/DeepSeek-V3-0324": { + "input_cost_per_token": 2.8e-07, + "litellm_provider": "gmi", + "max_input_tokens": 163840, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 8.8e-07, + "supports_function_calling": true + }, + "gmi/google/gemini-3-pro-preview": { + "input_cost_per_token": 2e-06, + "litellm_provider": "gmi", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/google/gemini-3-flash-preview": { + "input_cost_per_token": 5e-07, + "litellm_provider": "gmi", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 3e-06, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/moonshotai/Kimi-K2-Thinking": { + "input_cost_per_token": 8e-07, + "litellm_provider": "gmi", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.2e-06 + }, + "gmi/MiniMaxAI/MiniMax-M2.1": { + "input_cost_per_token": 3e-07, + "litellm_provider": "gmi", + "max_input_tokens": 196608, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.2e-06 + }, + "gmi/Qwen/Qwen3-VL-235B-A22B-Instruct-FP8": { + "input_cost_per_token": 3e-07, + "litellm_provider": "gmi", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.4e-06, + "supports_vision": true + }, + "gmi/zai-org/GLM-4.7-FP8": { + "input_cost_per_token": 4e-07, + "litellm_provider": "gmi", + "max_input_tokens": 202752, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2e-06 + }, "google.gemma-3-12b-it": { "input_cost_per_token": 9e-08, "litellm_provider": "bedrock_converse", diff --git a/provider_endpoints_support.json b/provider_endpoints_support.json index 441e27af98..343d5bd2c6 100644 --- a/provider_endpoints_support.json +++ b/provider_endpoints_support.json @@ -972,6 +972,24 @@ "interactions": true } }, + "gmi": { + "display_name": "GMI Cloud (`gmi`)", + "url": "https://docs.litellm.ai/docs/providers/gmi_cloud", + "endpoints": { + "chat_completions": true, + "messages": true, + "responses": true, + "embeddings": false, + "image_generations": false, + "audio_transcriptions": false, + "audio_speech": false, + "moderations": false, + "batches": false, + "rerank": false, + "a2a": true, + "interactions": true + } + }, "vertex_ai": { "display_name": "Google - Vertex AI (`vertex_ai`)", "url": "https://docs.litellm.ai/docs/providers/vertex",