Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
369 changes: 166 additions & 203 deletions model_prices_and_context_window.json
Original file line number Diff line number Diff line change
Expand Up @@ -27224,208 +27224,6 @@
"max_tokens": 128000,
"mode": "chat"
},
"ovhcloud/DeepSeek-R1-Distill-Llama-70B": {
"input_cost_per_token": 6.7e-07,
"litellm_provider": "ovhcloud",
"max_input_tokens": 131000,
"max_output_tokens": 131000,
"max_tokens": 131000,
"mode": "chat",
"output_cost_per_token": 6.7e-07,
"source": "https://endpoints.ai.cloud.ovh.net/models/deepseek-r1-distill-llama-70b",
"supports_function_calling": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
"ovhcloud/Llama-3.1-8B-Instruct": {
"input_cost_per_token": 1e-07,
"litellm_provider": "ovhcloud",
"max_input_tokens": 131000,
"max_output_tokens": 131000,
"max_tokens": 131000,
"mode": "chat",
"output_cost_per_token": 1e-07,
"source": "https://endpoints.ai.cloud.ovh.net/models/llama-3-1-8b-instruct",
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
"ovhcloud/Meta-Llama-3_1-70B-Instruct": {
"input_cost_per_token": 6.7e-07,
"litellm_provider": "ovhcloud",
"max_input_tokens": 131000,
"max_output_tokens": 131000,
"max_tokens": 131000,
"mode": "chat",
"output_cost_per_token": 6.7e-07,
"source": "https://endpoints.ai.cloud.ovh.net/models/meta-llama-3-1-70b-instruct",
"supports_function_calling": false,
"supports_response_schema": false,
"supports_tool_choice": false
},
"ovhcloud/Meta-Llama-3_3-70B-Instruct": {
"input_cost_per_token": 6.7e-07,
"litellm_provider": "ovhcloud",
"max_input_tokens": 131000,
"max_output_tokens": 131000,
"max_tokens": 131000,
"mode": "chat",
"output_cost_per_token": 6.7e-07,
"source": "https://endpoints.ai.cloud.ovh.net/models/meta-llama-3-3-70b-instruct",
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
"ovhcloud/Mistral-7B-Instruct-v0.3": {
"input_cost_per_token": 1e-07,
"litellm_provider": "ovhcloud",
"max_input_tokens": 127000,
"max_output_tokens": 127000,
"max_tokens": 127000,
"mode": "chat",
"output_cost_per_token": 1e-07,
"source": "https://endpoints.ai.cloud.ovh.net/models/mistral-7b-instruct-v0-3",
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
"ovhcloud/Mistral-Nemo-Instruct-2407": {
"input_cost_per_token": 1.3e-07,
"litellm_provider": "ovhcloud",
"max_input_tokens": 118000,
"max_output_tokens": 118000,
"max_tokens": 118000,
"mode": "chat",
"output_cost_per_token": 1.3e-07,
"source": "https://endpoints.ai.cloud.ovh.net/models/mistral-nemo-instruct-2407",
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
"ovhcloud/Mistral-Small-3.2-24B-Instruct-2506": {
"input_cost_per_token": 9e-08,
"litellm_provider": "ovhcloud",
"max_input_tokens": 128000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 2.8e-07,
"source": "https://endpoints.ai.cloud.ovh.net/models/mistral-small-3-2-24b-instruct-2506",
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true
},
"ovhcloud/Mixtral-8x7B-Instruct-v0.1": {
"input_cost_per_token": 6.3e-07,
"litellm_provider": "ovhcloud",
"max_input_tokens": 32000,
"max_output_tokens": 32000,
"max_tokens": 32000,
"mode": "chat",
"output_cost_per_token": 6.3e-07,
"source": "https://endpoints.ai.cloud.ovh.net/models/mixtral-8x7b-instruct-v0-1",
"supports_function_calling": false,
"supports_response_schema": true,
"supports_tool_choice": false
},
"ovhcloud/Qwen2.5-Coder-32B-Instruct": {
"input_cost_per_token": 8.7e-07,
"litellm_provider": "ovhcloud",
"max_input_tokens": 32000,
"max_output_tokens": 32000,
"max_tokens": 32000,
"mode": "chat",
"output_cost_per_token": 8.7e-07,
"source": "https://endpoints.ai.cloud.ovh.net/models/qwen2-5-coder-32b-instruct",
"supports_function_calling": false,
"supports_response_schema": true,
"supports_tool_choice": false
},
"ovhcloud/Qwen2.5-VL-72B-Instruct": {
"input_cost_per_token": 9.1e-07,
"litellm_provider": "ovhcloud",
"max_input_tokens": 32000,
"max_output_tokens": 32000,
"max_tokens": 32000,
"mode": "chat",
"output_cost_per_token": 9.1e-07,
"source": "https://endpoints.ai.cloud.ovh.net/models/qwen2-5-vl-72b-instruct",
"supports_function_calling": false,
"supports_response_schema": true,
"supports_tool_choice": false,
"supports_vision": true
},
"ovhcloud/Qwen3-32B": {
"input_cost_per_token": 8e-08,
"litellm_provider": "ovhcloud",
"max_input_tokens": 32000,
"max_output_tokens": 32000,
"max_tokens": 32000,
"mode": "chat",
"output_cost_per_token": 2.3e-07,
"source": "https://endpoints.ai.cloud.ovh.net/models/qwen3-32b",
"supports_function_calling": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
"ovhcloud/gpt-oss-120b": {
"input_cost_per_token": 8e-08,
"litellm_provider": "ovhcloud",
"max_input_tokens": 131000,
"max_output_tokens": 131000,
"max_tokens": 131000,
"mode": "chat",
"output_cost_per_token": 4e-07,
"source": "https://endpoints.ai.cloud.ovh.net/models/gpt-oss-120b",
"supports_function_calling": false,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_tool_choice": false
},
"ovhcloud/gpt-oss-20b": {
"input_cost_per_token": 4e-08,
"litellm_provider": "ovhcloud",
"max_input_tokens": 131000,
"max_output_tokens": 131000,
"max_tokens": 131000,
"mode": "chat",
"output_cost_per_token": 1.5e-07,
"source": "https://endpoints.ai.cloud.ovh.net/models/gpt-oss-20b",
"supports_function_calling": false,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_tool_choice": false
},
"ovhcloud/llava-v1.6-mistral-7b-hf": {
"input_cost_per_token": 2.9e-07,
"litellm_provider": "ovhcloud",
"max_input_tokens": 32000,
"max_output_tokens": 32000,
"max_tokens": 32000,
"mode": "chat",
"output_cost_per_token": 2.9e-07,
"source": "https://endpoints.ai.cloud.ovh.net/models/llava-next-mistral-7b",
"supports_function_calling": false,
"supports_response_schema": true,
"supports_tool_choice": false,
"supports_vision": true
},
"ovhcloud/mamba-codestral-7B-v0.1": {
"input_cost_per_token": 1.9e-07,
"litellm_provider": "ovhcloud",
"max_input_tokens": 256000,
"max_output_tokens": 256000,
"max_tokens": 256000,
"mode": "chat",
"output_cost_per_token": 1.9e-07,
"source": "https://endpoints.ai.cloud.ovh.net/models/mamba-codestral-7b-v0-1",
"supports_function_calling": false,
"supports_response_schema": true,
"supports_tool_choice": false
},
"palm/chat-bison": {
"input_cost_per_token": 1.25e-07,
"litellm_provider": "palm",
Expand Down Expand Up @@ -39203,5 +39001,170 @@
"supports_reasoning": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
"ovhcloud/Qwen3Guard-Gen-8B": {
"litellm_provider": "ovhcloud",
"mode": "chat",
"max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 32768
},
"ovhcloud/Qwen3Guard-Gen-0.6B": {
"litellm_provider": "ovhcloud",
"mode": "chat",
"max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 32768
},
"ovhcloud/Meta-Llama-3_3-70B-Instruct": {
"litellm_provider": "ovhcloud",
"mode": "chat",
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 7.4e-07,
"output_cost_per_token": 7.4e-07,
"supports_function_calling": true,
Comment on lines +39016 to +39027
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

source field dropped from all updated models

Every previously-existing OVHcloud entry had a source URL (e.g. "source": "https://endpoints.ai.cloud.ovh.net/models/...") that linked to the endpoint documentation. All twelve updated model entries in this PR have removed that field entirely. This reduces discoverability and makes it harder for users and maintainers to verify pricing and capability information.

Please restore the source field on all updated entries. For example, ovhcloud/Meta-Llama-3_3-70B-Instruct had:

"source": "https://endpoints.ai.cloud.ovh.net/models/meta-llama-3-3-70b-instruct"

This pattern applies to all twelve re-declared models (Llama-3.1-8B-Instruct, Qwen2.5-VL-72B-Instruct, Qwen3-Coder-30B-A3B-Instruct, Mistral-Small-3.2-24B-Instruct-2506, Mistral-Nemo-Instruct-2407, Mixtral-8x7B-Instruct-v0.1, Qwen3-32B, DeepSeek-R1-Distill-Llama-70B, gpt-oss-20b, Mistral-7B-Instruct-v0.3, gpt-oss-120b).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No it's not necessary

"supports_response_schema": true,
"supports_tool_choice": true
},
"ovhcloud/Llama-3.1-8B-Instruct": {
"litellm_provider": "ovhcloud",
"mode": "chat",
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 1.1e-07,
"output_cost_per_token": 1.1e-07,
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
"ovhcloud/Qwen2.5-VL-72B-Instruct": {
"litellm_provider": "ovhcloud",
"mode": "chat",
"max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 32768,
"input_cost_per_token": 1.01e-06,
"output_cost_per_token": 1.01e-06,
"supports_function_calling": false,
"supports_response_schema": true,
"supports_tool_choice": false,
"supports_vision": true
},
"ovhcloud/Qwen3-Coder-30B-A3B-Instruct": {
"litellm_provider": "ovhcloud",
"mode": "chat",
"max_tokens": 262144,
"max_input_tokens": 262144,
"max_output_tokens": 262144,
"input_cost_per_token": 7e-08,
"output_cost_per_token": 2.6e-07,
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_reasoning": false
},
"ovhcloud/Mistral-Small-3.2-24B-Instruct-2506": {
"litellm_provider": "ovhcloud",
"mode": "chat",
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 1e-07,
"output_cost_per_token": 3.1e-07,
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true
},
"ovhcloud/Mistral-Nemo-Instruct-2407": {
"litellm_provider": "ovhcloud",
"mode": "chat",
Comment on lines +39082 to +39084
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Context window reduced for Mistral-Nemo-Instruct-2407 and Mistral-7B-Instruct-v0.3

Two models have had their context windows significantly reduced compared to the previous entries:

  • ovhcloud/Mistral-Nemo-Instruct-2407: 118 000 → 65 536 tokens (≈ 45% reduction)
  • ovhcloud/Mistral-7B-Instruct-v0.3 (line ~39141): 127 000 → 65 536 tokens (≈ 48% reduction)

Users who currently pass prompts between 65 K and 118 K / 127 K tokens to these models will start receiving context-limit errors after this change is deployed. Per the repo's guideline against backwards-incompatible changes without user-controlled flags, this should be documented in the PR description if it reflects a real OVHcloud endpoint limit change, or reverted if it does not.

Context Used: Rule from dashboard - What: avoid backwards-incompatible changes without user-controlled flags

Why: This breaks current ... (source)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it's normal

"max_tokens": 65536,
"max_input_tokens": 65536,
"max_output_tokens": 65536,
"input_cost_per_token": 1.4e-07,
"output_cost_per_token": 1.4e-07,
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
"ovhcloud/Mixtral-8x7B-Instruct-v0.1": {
"litellm_provider": "ovhcloud",
"mode": "chat",
"max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 32768,
"input_cost_per_token": 7e-07,
"output_cost_per_token": 7e-07,
"supports_function_calling": false,
"supports_response_schema": true,
"supports_tool_choice": false
},
"ovhcloud/Qwen3-32B": {
"litellm_provider": "ovhcloud",
"mode": "chat",
"max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 32768,
"input_cost_per_token": 9e-08,
"output_cost_per_token": 2.5e-07,
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_reasoning": true
},
"ovhcloud/DeepSeek-R1-Distill-Llama-70B": {
"litellm_provider": "ovhcloud",
"mode": "chat",
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 7.4e-07,
"output_cost_per_token": 7.4e-07,
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_reasoning": true
},
"ovhcloud/gpt-oss-20b": {
"litellm_provider": "ovhcloud",
"mode": "chat",
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 5e-08,
"output_cost_per_token": 1.8e-07,
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_reasoning": true
},
"ovhcloud/Mistral-7B-Instruct-v0.3": {
"litellm_provider": "ovhcloud",
"mode": "chat",
"max_tokens": 65536,
"max_input_tokens": 65536,
"max_output_tokens": 65536,
"input_cost_per_token": 1.1e-07,
"output_cost_per_token": 1.1e-07,
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
"ovhcloud/gpt-oss-120b": {
"litellm_provider": "ovhcloud",
"mode": "chat",
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 9e-08,
"output_cost_per_token": 4.7e-07,
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_reasoning": true
}
}
}
Loading