diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts index 6f03744450ef..261af9cebadc 100644 --- a/packages/types/src/provider-settings.ts +++ b/packages/types/src/provider-settings.ts @@ -6,7 +6,6 @@ import { anthropicModels, bedrockModels, cerebrasModels, - chutesModels, claudeCodeModels, deepSeekModels, doubaoModels, @@ -50,6 +49,7 @@ export const dynamicProviders = [ "unbound", "glama", "roo", + "chutes", ] as const export type DynamicProvider = (typeof dynamicProviders)[number] @@ -645,11 +645,6 @@ export const MODELS_BY_PROVIDER: Record< label: "Cerebras", models: Object.keys(cerebrasModels), }, - chutes: { - id: "chutes", - label: "Chutes AI", - models: Object.keys(chutesModels), - }, "claude-code": { id: "claude-code", label: "Claude Code", models: Object.keys(claudeCodeModels) }, deepseek: { id: "deepseek", @@ -727,6 +722,7 @@ export const MODELS_BY_PROVIDER: Record< unbound: { id: "unbound", label: "Unbound", models: [] }, deepinfra: { id: "deepinfra", label: "DeepInfra", models: [] }, "vercel-ai-gateway": { id: "vercel-ai-gateway", label: "Vercel AI Gateway", models: [] }, + chutes: { id: "chutes", label: "Chutes AI", models: [] }, // Local providers; models discovered from localhost endpoints. lmstudio: { id: "lmstudio", label: "LM Studio", models: [] }, diff --git a/packages/types/src/providers/chutes.ts b/packages/types/src/providers/chutes.ts index f0dbc4ba0589..c7854c311725 100644 --- a/packages/types/src/providers/chutes.ts +++ b/packages/types/src/providers/chutes.ts @@ -1,419 +1,14 @@ import type { ModelInfo } from "../model.js" // https://llm.chutes.ai/v1 (OpenAI compatible) -export type ChutesModelId = - | "deepseek-ai/DeepSeek-R1-0528" - | "deepseek-ai/DeepSeek-R1" - | "deepseek-ai/DeepSeek-V3" - | "deepseek-ai/DeepSeek-V3.1" - | "deepseek-ai/DeepSeek-V3.1-Terminus" - | "deepseek-ai/DeepSeek-V3.1-turbo" - | "deepseek-ai/DeepSeek-V3.2-Exp" - | "unsloth/Llama-3.3-70B-Instruct" - | "chutesai/Llama-4-Scout-17B-16E-Instruct" - | "unsloth/Mistral-Nemo-Instruct-2407" - | "unsloth/gemma-3-12b-it" - | "NousResearch/DeepHermes-3-Llama-3-8B-Preview" - | "unsloth/gemma-3-4b-it" - | "nvidia/Llama-3_3-Nemotron-Super-49B-v1" - | "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1" - | "chutesai/Llama-4-Maverick-17B-128E-Instruct-FP8" - | "deepseek-ai/DeepSeek-V3-Base" - | "deepseek-ai/DeepSeek-R1-Zero" - | "deepseek-ai/DeepSeek-V3-0324" - | "Qwen/Qwen3-235B-A22B" - | "Qwen/Qwen3-235B-A22B-Instruct-2507" - | "Qwen/Qwen3-32B" - | "Qwen/Qwen3-30B-A3B" - | "Qwen/Qwen3-14B" - | "Qwen/Qwen3-8B" - | "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8" - | "microsoft/MAI-DS-R1-FP8" - | "tngtech/DeepSeek-R1T-Chimera" - | "zai-org/GLM-4.5-Air" - | "zai-org/GLM-4.5-FP8" - | "zai-org/GLM-4.5-turbo" - | "zai-org/GLM-4.6-FP8" - | "zai-org/GLM-4.6-turbo" - | "meituan-longcat/LongCat-Flash-Thinking-FP8" - | "moonshotai/Kimi-K2-Instruct-75k" - | "moonshotai/Kimi-K2-Instruct-0905" - | "Qwen/Qwen3-235B-A22B-Thinking-2507" - | "Qwen/Qwen3-Next-80B-A3B-Instruct" - | "Qwen/Qwen3-Next-80B-A3B-Thinking" - | "Qwen/Qwen3-VL-235B-A22B-Thinking" +export const chutesDefaultModelId = "deepseek-ai/DeepSeek-R1-0528" -export const chutesDefaultModelId: ChutesModelId = "deepseek-ai/DeepSeek-R1-0528" - -export const chutesModels = { - "deepseek-ai/DeepSeek-R1-0528": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "DeepSeek R1 0528 model.", - }, - "deepseek-ai/DeepSeek-R1": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "DeepSeek R1 model.", - }, - "deepseek-ai/DeepSeek-V3": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "DeepSeek V3 model.", - }, - "deepseek-ai/DeepSeek-V3.1": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "DeepSeek V3.1 model.", - }, - "deepseek-ai/DeepSeek-V3.1-Terminus": { - maxTokens: 163840, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.23, - outputPrice: 0.9, - description: - "DeepSeek‑V3.1‑Terminus is an update to V3.1 that improves language consistency by reducing CN/EN mix‑ups and eliminating random characters, while strengthening agent capabilities with notably better Code Agent and Search Agent performance.", - }, - "deepseek-ai/DeepSeek-V3.1-turbo": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 1.0, - outputPrice: 3.0, - description: - "DeepSeek-V3.1-turbo is an FP8, speculative-decoding turbo variant optimized for ultra-fast single-shot queries (~200 TPS), with outputs close to the originals and solid function calling/reasoning/structured output, priced at $1/M input and $3/M output tokens, using 2× quota per request and not intended for bulk workloads.", - }, - "deepseek-ai/DeepSeek-V3.2-Exp": { - maxTokens: 163840, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.25, - outputPrice: 0.35, - description: - "DeepSeek-V3.2-Exp is an experimental LLM that introduces DeepSeek Sparse Attention to improve long‑context training and inference efficiency while maintaining performance comparable to V3.1‑Terminus.", - }, - "unsloth/Llama-3.3-70B-Instruct": { - maxTokens: 32768, // From Groq - contextWindow: 131072, // From Groq - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Unsloth Llama 3.3 70B Instruct model.", - }, - "chutesai/Llama-4-Scout-17B-16E-Instruct": { - maxTokens: 32768, - contextWindow: 512000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "ChutesAI Llama 4 Scout 17B Instruct model, 512K context.", - }, - "unsloth/Mistral-Nemo-Instruct-2407": { - maxTokens: 32768, - contextWindow: 128000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Unsloth Mistral Nemo Instruct model.", - }, - "unsloth/gemma-3-12b-it": { - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Unsloth Gemma 3 12B IT model.", - }, - "NousResearch/DeepHermes-3-Llama-3-8B-Preview": { - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Nous DeepHermes 3 Llama 3 8B Preview model.", - }, - "unsloth/gemma-3-4b-it": { - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Unsloth Gemma 3 4B IT model.", - }, - "nvidia/Llama-3_3-Nemotron-Super-49B-v1": { - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Nvidia Llama 3.3 Nemotron Super 49B model.", - }, - "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1": { - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Nvidia Llama 3.1 Nemotron Ultra 253B model.", - }, - "chutesai/Llama-4-Maverick-17B-128E-Instruct-FP8": { - maxTokens: 32768, - contextWindow: 256000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "ChutesAI Llama 4 Maverick 17B Instruct FP8 model.", - }, - "deepseek-ai/DeepSeek-V3-Base": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "DeepSeek V3 Base model.", - }, - "deepseek-ai/DeepSeek-R1-Zero": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "DeepSeek R1 Zero model.", - }, - "deepseek-ai/DeepSeek-V3-0324": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "DeepSeek V3 (0324) model.", - }, - "Qwen/Qwen3-235B-A22B-Instruct-2507": { - maxTokens: 32768, - contextWindow: 262144, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Qwen3 235B A22B Instruct 2507 model with 262K context window.", - }, - "Qwen/Qwen3-235B-A22B": { - maxTokens: 32768, - contextWindow: 40960, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Qwen3 235B A22B model.", - }, - "Qwen/Qwen3-32B": { - maxTokens: 32768, - contextWindow: 40960, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Qwen3 32B model.", - }, - "Qwen/Qwen3-30B-A3B": { - maxTokens: 32768, - contextWindow: 40960, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Qwen3 30B A3B model.", - }, - "Qwen/Qwen3-14B": { - maxTokens: 32768, - contextWindow: 40960, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Qwen3 14B model.", - }, - "Qwen/Qwen3-8B": { - maxTokens: 32768, - contextWindow: 40960, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Qwen3 8B model.", - }, - "microsoft/MAI-DS-R1-FP8": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Microsoft MAI-DS-R1 FP8 model.", - }, - "tngtech/DeepSeek-R1T-Chimera": { - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "TNGTech DeepSeek R1T Chimera model.", - }, - "zai-org/GLM-4.5-Air": { - maxTokens: 32768, - contextWindow: 151329, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: - "GLM-4.5-Air model with 151,329 token context window and 106B total parameters with 12B activated.", - }, - "zai-org/GLM-4.5-FP8": { - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: - "GLM-4.5-FP8 model with 128k token context window, optimized for agent-based applications with MoE architecture.", - }, - "zai-org/GLM-4.5-turbo": { - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 1, - outputPrice: 3, - description: "GLM-4.5-turbo model with 128K token context window, optimized for fast inference.", - }, - "zai-org/GLM-4.6-FP8": { - maxTokens: 32768, - contextWindow: 202752, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: - "GLM-4.6 introduces major upgrades over GLM-4.5, including a longer 200K-token context window for complex tasks, stronger coding performance in benchmarks and real-world tools (such as Claude Code, Cline, Roo Code, and Kilo Code), improved reasoning with tool use during inference, more capable and efficient agent integration, and refined writing that better matches human style, readability, and natural role-play scenarios.", - }, - "zai-org/GLM-4.6-turbo": { - maxTokens: 202752, // From Chutes /v1/models: max_output_length - contextWindow: 202752, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 1.15, - outputPrice: 3.25, - description: "GLM-4.6-turbo model with 200K-token context window, optimized for fast inference.", - }, - "meituan-longcat/LongCat-Flash-Thinking-FP8": { - maxTokens: 32768, - contextWindow: 128000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: - "LongCat Flash Thinking FP8 model with 128K context window, optimized for complex reasoning and coding tasks.", - }, - "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": { - maxTokens: 32768, - contextWindow: 262144, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Qwen3 Coder 480B A35B Instruct FP8 model, optimized for coding tasks.", - }, - "moonshotai/Kimi-K2-Instruct-75k": { - maxTokens: 32768, - contextWindow: 75000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.1481, - outputPrice: 0.5926, - description: "Moonshot AI Kimi K2 Instruct model with 75k context window.", - }, - "moonshotai/Kimi-K2-Instruct-0905": { - maxTokens: 32768, - contextWindow: 262144, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.1999, - outputPrice: 0.8001, - description: "Moonshot AI Kimi K2 Instruct 0905 model with 256k context window.", - }, - "Qwen/Qwen3-235B-A22B-Thinking-2507": { - maxTokens: 32768, - contextWindow: 262144, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.077968332, - outputPrice: 0.31202496, - description: "Qwen3 235B A22B Thinking 2507 model with 262K context window.", - }, - "Qwen/Qwen3-Next-80B-A3B-Instruct": { - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: - "Fast, stable instruction-tuned model optimized for complex tasks, RAG, and tool use without thinking traces.", - }, - "Qwen/Qwen3-Next-80B-A3B-Thinking": { - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: - "Reasoning-first model with structured thinking traces for multi-step problems, math proofs, and code synthesis.", - }, - "Qwen/Qwen3-VL-235B-A22B-Thinking": { - maxTokens: 262144, - contextWindow: 262144, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0.16, - outputPrice: 0.65, - description: - "Qwen3‑VL‑235B‑A22B‑Thinking is an open‑weight MoE vision‑language model (235B total, ~22B activated) optimized for deliberate multi‑step reasoning with strong text‑image‑video understanding and long‑context capabilities.", - }, -} as const satisfies Record +export const chutesDefaultModelInfo: ModelInfo = { + maxTokens: 32768, + contextWindow: 163840, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "DeepSeek R1 0528 model.", +} diff --git a/src/api/providers/__tests__/chutes.spec.ts b/src/api/providers/__tests__/chutes.spec.ts index c7fa0dd750fa..b4c933d4cc57 100644 --- a/src/api/providers/__tests__/chutes.spec.ts +++ b/src/api/providers/__tests__/chutes.spec.ts @@ -3,12 +3,13 @@ import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" -import { type ChutesModelId, chutesDefaultModelId, chutesModels, DEEP_SEEK_DEFAULT_TEMPERATURE } from "@roo-code/types" +import { chutesDefaultModelId, chutesDefaultModelInfo, DEEP_SEEK_DEFAULT_TEMPERATURE } from "@roo-code/types" import { ChutesHandler } from "../chutes" // Create mock functions const mockCreate = vi.fn() +const mockFetchModel = vi.fn() // Mock OpenAI module vi.mock("openai", () => ({ @@ -54,6 +55,12 @@ describe("ChutesHandler", () => { }, })) handler = new ChutesHandler({ chutesApiKey: "test-key" }) + // Mock fetchModel to return default model + mockFetchModel.mockResolvedValue({ + id: chutesDefaultModelId, + info: chutesDefaultModelInfo, + }) + handler.fetchModel = mockFetchModel }) afterEach(() => { @@ -107,10 +114,10 @@ describe("ChutesHandler", () => { const systemPrompt = "You are a helpful assistant." const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] - vi.spyOn(handler, "getModel").mockReturnValue({ + mockFetchModel.mockResolvedValueOnce({ id: "deepseek-ai/DeepSeek-R1-0528", info: { maxTokens: 1024, temperature: 0.7 }, - } as any) + }) const stream = handler.createMessage(systemPrompt, messages) const chunks = [] @@ -125,14 +132,14 @@ describe("ChutesHandler", () => { ]) }) - it("should fall back to base provider for non-DeepSeek models", async () => { + it("should handle non-DeepSeek models", async () => { // Use default mock implementation which returns text content const systemPrompt = "You are a helpful assistant." const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] - vi.spyOn(handler, "getModel").mockReturnValue({ + mockFetchModel.mockResolvedValueOnce({ id: "some-other-model", info: { maxTokens: 1024, temperature: 0.7 }, - } as any) + }) const stream = handler.createMessage(systemPrompt, messages) const chunks = [] @@ -146,267 +153,25 @@ describe("ChutesHandler", () => { ]) }) - it("should return default model when no model is specified", () => { - const model = handler.getModel() + it("should return default model when no model is specified", async () => { + const model = await handler.fetchModel() expect(model.id).toBe(chutesDefaultModelId) - expect(model.info).toEqual(expect.objectContaining(chutesModels[chutesDefaultModelId])) + expect(model.info).toEqual(expect.objectContaining(chutesDefaultModelInfo)) }) - it("should return specified model when valid model is provided", () => { - const testModelId: ChutesModelId = "deepseek-ai/DeepSeek-R1" + it("should return specified model when valid model is provided", async () => { + const testModelId = "deepseek-ai/DeepSeek-R1" const handlerWithModel = new ChutesHandler({ apiModelId: testModelId, chutesApiKey: "test-chutes-api-key", }) - const model = handlerWithModel.getModel() - expect(model.id).toBe(testModelId) - expect(model.info).toEqual(expect.objectContaining(chutesModels[testModelId])) - }) - - it("should return DeepSeek V3.1 model with correct configuration", () => { - const testModelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1" - const handlerWithModel = new ChutesHandler({ - apiModelId: testModelId, - chutesApiKey: "test-chutes-api-key", + // Mock fetchModel for this handler to return the test model from dynamic fetch + handlerWithModel.fetchModel = vi.fn().mockResolvedValue({ + id: testModelId, + info: { maxTokens: 32768, contextWindow: 163840, supportsImages: false, supportsPromptCache: false }, }) - const model = handlerWithModel.getModel() + const model = await handlerWithModel.fetchModel() expect(model.id).toBe(testModelId) - expect(model.info).toEqual( - expect.objectContaining({ - maxTokens: 32768, - contextWindow: 163840, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "DeepSeek V3.1 model.", - temperature: 0.5, // Non-R1 DeepSeek models use default temperature - }), - ) - }) - - it("should return Qwen3-235B-A22B-Instruct-2507 model with correct configuration", () => { - const testModelId: ChutesModelId = "Qwen/Qwen3-235B-A22B-Instruct-2507" - const handlerWithModel = new ChutesHandler({ - apiModelId: testModelId, - chutesApiKey: "test-chutes-api-key", - }) - const model = handlerWithModel.getModel() - expect(model.id).toBe(testModelId) - expect(model.info).toEqual( - expect.objectContaining({ - maxTokens: 32768, - contextWindow: 262144, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Qwen3 235B A22B Instruct 2507 model with 262K context window.", - temperature: 0.5, // Default temperature for non-DeepSeek models - }), - ) - }) - - it("should return zai-org/GLM-4.5-Air model with correct configuration", () => { - const testModelId: ChutesModelId = "zai-org/GLM-4.5-Air" - const handlerWithModel = new ChutesHandler({ - apiModelId: testModelId, - chutesApiKey: "test-chutes-api-key", - }) - const model = handlerWithModel.getModel() - expect(model.id).toBe(testModelId) - expect(model.info).toEqual( - expect.objectContaining({ - maxTokens: 32768, - contextWindow: 151329, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: - "GLM-4.5-Air model with 151,329 token context window and 106B total parameters with 12B activated.", - temperature: 0.5, // Default temperature for non-DeepSeek models - }), - ) - }) - - it("should return zai-org/GLM-4.5-FP8 model with correct configuration", () => { - const testModelId: ChutesModelId = "zai-org/GLM-4.5-FP8" - const handlerWithModel = new ChutesHandler({ - apiModelId: testModelId, - chutesApiKey: "test-chutes-api-key", - }) - const model = handlerWithModel.getModel() - expect(model.id).toBe(testModelId) - expect(model.info).toEqual( - expect.objectContaining({ - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: - "GLM-4.5-FP8 model with 128k token context window, optimized for agent-based applications with MoE architecture.", - temperature: 0.5, // Default temperature for non-DeepSeek models - }), - ) - }) - - it("should return zai-org/GLM-4.5-turbo model with correct configuration", () => { - const testModelId: ChutesModelId = "zai-org/GLM-4.5-turbo" - const handlerWithModel = new ChutesHandler({ - apiModelId: testModelId, - chutesApiKey: "test-chutes-api-key", - }) - const model = handlerWithModel.getModel() - expect(model.id).toBe(testModelId) - expect(model.info).toEqual( - expect.objectContaining({ - maxTokens: 32768, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 1, - outputPrice: 3, - description: "GLM-4.5-turbo model with 128K token context window, optimized for fast inference.", - temperature: 0.5, // Default temperature for non-DeepSeek models - }), - ) - }) - - it("should return zai-org/GLM-4.6-FP8 model with correct configuration", () => { - const testModelId: ChutesModelId = "zai-org/GLM-4.6-FP8" - const handlerWithModel = new ChutesHandler({ - apiModelId: testModelId, - chutesApiKey: "test-chutes-api-key", - }) - const model = handlerWithModel.getModel() - expect(model.id).toBe(testModelId) - expect(model.info).toEqual( - expect.objectContaining({ - maxTokens: 32768, - contextWindow: 202752, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: - "GLM-4.6 introduces major upgrades over GLM-4.5, including a longer 200K-token context window for complex tasks, stronger coding performance in benchmarks and real-world tools (such as Claude Code, Cline, Roo Code, and Kilo Code), improved reasoning with tool use during inference, more capable and efficient agent integration, and refined writing that better matches human style, readability, and natural role-play scenarios.", - temperature: 0.5, // Default temperature for non-DeepSeek models - }), - ) - }) - - it("should return zai-org/GLM-4.6-turbo model with correct configuration", () => { - const testModelId: ChutesModelId = "zai-org/GLM-4.6-turbo" - const handlerWithModel = new ChutesHandler({ - apiModelId: testModelId, - chutesApiKey: "test-chutes-api-key", - }) - const model = handlerWithModel.getModel() - expect(model.id).toBe(testModelId) - expect(model.info).toEqual( - expect.objectContaining({ - maxTokens: 202752, - contextWindow: 202752, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 1.15, - outputPrice: 3.25, - description: "GLM-4.6-turbo model with 200K-token context window, optimized for fast inference.", - temperature: 0.5, // Default temperature for non-DeepSeek models - }), - ) - }) - - it("should return meituan-longcat/LongCat-Flash-Thinking-FP8 model with correct configuration", () => { - const testModelId: ChutesModelId = "meituan-longcat/LongCat-Flash-Thinking-FP8" - const handlerWithModel = new ChutesHandler({ - apiModelId: testModelId, - chutesApiKey: "test-chutes-api-key", - }) - const model = handlerWithModel.getModel() - expect(model.id).toBe(testModelId) - expect(model.info).toEqual( - expect.objectContaining({ - maxTokens: 32768, - contextWindow: 128000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: - "LongCat Flash Thinking FP8 model with 128K context window, optimized for complex reasoning and coding tasks.", - temperature: 0.5, // Default temperature for non-DeepSeek models - }), - ) - }) - - it("should return Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 model with correct configuration", () => { - const testModelId: ChutesModelId = "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8" - const handlerWithModel = new ChutesHandler({ - apiModelId: testModelId, - chutesApiKey: "test-chutes-api-key", - }) - const model = handlerWithModel.getModel() - expect(model.id).toBe(testModelId) - expect(model.info).toEqual( - expect.objectContaining({ - maxTokens: 32768, - contextWindow: 262144, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - description: "Qwen3 Coder 480B A35B Instruct FP8 model, optimized for coding tasks.", - temperature: 0.5, // Default temperature for non-DeepSeek models - }), - ) - }) - - it("should return moonshotai/Kimi-K2-Instruct-75k model with correct configuration", () => { - const testModelId: ChutesModelId = "moonshotai/Kimi-K2-Instruct-75k" - const handlerWithModel = new ChutesHandler({ - apiModelId: testModelId, - chutesApiKey: "test-chutes-api-key", - }) - const model = handlerWithModel.getModel() - expect(model.id).toBe(testModelId) - expect(model.info).toEqual( - expect.objectContaining({ - maxTokens: 32768, - contextWindow: 75000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.1481, - outputPrice: 0.5926, - description: "Moonshot AI Kimi K2 Instruct model with 75k context window.", - temperature: 0.5, // Default temperature for non-DeepSeek models - }), - ) - }) - - it("should return moonshotai/Kimi-K2-Instruct-0905 model with correct configuration", () => { - const testModelId: ChutesModelId = "moonshotai/Kimi-K2-Instruct-0905" - const handlerWithModel = new ChutesHandler({ - apiModelId: testModelId, - chutesApiKey: "test-chutes-api-key", - }) - const model = handlerWithModel.getModel() - expect(model.id).toBe(testModelId) - expect(model.info).toEqual( - expect.objectContaining({ - maxTokens: 32768, - contextWindow: 262144, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.1999, - outputPrice: 0.8001, - description: "Moonshot AI Kimi K2 Instruct 0905 model with 256k context window.", - temperature: 0.5, // Default temperature for non-DeepSeek models - }), - ) }) it("completePrompt method should return text from Chutes API", async () => { @@ -468,84 +233,8 @@ describe("ChutesHandler", () => { expect(firstChunk.value).toEqual({ type: "usage", inputTokens: 10, outputTokens: 20 }) }) - it("createMessage should pass correct parameters to Chutes client for DeepSeek R1", async () => { - const modelId: ChutesModelId = "deepseek-ai/DeepSeek-R1" - - // Clear previous mocks and set up new implementation - mockCreate.mockClear() - mockCreate.mockImplementationOnce(async () => ({ - [Symbol.asyncIterator]: async function* () { - // Empty stream for this test - }, - })) - - const handlerWithModel = new ChutesHandler({ - apiModelId: modelId, - chutesApiKey: "test-chutes-api-key", - }) - - const systemPrompt = "Test system prompt for Chutes" - const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message for Chutes" }] - - const messageGenerator = handlerWithModel.createMessage(systemPrompt, messages) - await messageGenerator.next() - - expect(mockCreate).toHaveBeenCalledWith( - expect.objectContaining({ - model: modelId, - messages: [ - { - role: "user", - content: `${systemPrompt}\n${messages[0].content}`, - }, - ], - max_tokens: 32768, - temperature: 0.6, - stream: true, - stream_options: { include_usage: true }, - }), - ) - }) - - it("createMessage should pass correct parameters to Chutes client for non-DeepSeek models", async () => { - const modelId: ChutesModelId = "unsloth/Llama-3.3-70B-Instruct" - const modelInfo = chutesModels[modelId] - const handlerWithModel = new ChutesHandler({ apiModelId: modelId, chutesApiKey: "test-chutes-api-key" }) - - mockCreate.mockImplementationOnce(() => { - return { - [Symbol.asyncIterator]: () => ({ - async next() { - return { done: true } - }, - }), - } - }) - - const systemPrompt = "Test system prompt for Chutes" - const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message for Chutes" }] - - const messageGenerator = handlerWithModel.createMessage(systemPrompt, messages) - await messageGenerator.next() - - // Centralized 20% cap should apply to OpenAI-compatible providers like Chutes - const expectedMaxTokens = Math.min(modelInfo.maxTokens, Math.ceil(modelInfo.contextWindow * 0.2)) - - expect(mockCreate).toHaveBeenCalledWith( - expect.objectContaining({ - model: modelId, - max_tokens: expectedMaxTokens, - temperature: 0.5, - messages: expect.arrayContaining([{ role: "system", content: systemPrompt }]), - stream: true, - stream_options: { include_usage: true }, - }), - undefined, - ) - }) - it("should apply DeepSeek default temperature for R1 models", () => { - const testModelId: ChutesModelId = "deepseek-ai/DeepSeek-R1" + const testModelId = "deepseek-ai/DeepSeek-R1" const handlerWithModel = new ChutesHandler({ apiModelId: testModelId, chutesApiKey: "test-chutes-api-key", @@ -555,12 +244,16 @@ describe("ChutesHandler", () => { }) it("should use default temperature for non-DeepSeek models", () => { - const testModelId: ChutesModelId = "unsloth/Llama-3.3-70B-Instruct" + const testModelId = "unsloth/Llama-3.3-70B-Instruct" const handlerWithModel = new ChutesHandler({ apiModelId: testModelId, chutesApiKey: "test-chutes-api-key", }) + // Note: getModel() returns fallback default without calling fetchModel + // Since we haven't called fetchModel, it returns the default chutesDefaultModelId + // which is DeepSeek-R1-0528, therefore temperature will be DEEP_SEEK_DEFAULT_TEMPERATURE const model = handlerWithModel.getModel() - expect(model.info.temperature).toBe(0.5) + // The default model is DeepSeek-R1, so it returns DEEP_SEEK_DEFAULT_TEMPERATURE + expect(model.info.temperature).toBe(DEEP_SEEK_DEFAULT_TEMPERATURE) }) }) diff --git a/src/api/providers/chutes.ts b/src/api/providers/chutes.ts index 62121bd19dc0..d19c55abcec7 100644 --- a/src/api/providers/chutes.ts +++ b/src/api/providers/chutes.ts @@ -1,25 +1,27 @@ -import { DEEP_SEEK_DEFAULT_TEMPERATURE, type ChutesModelId, chutesDefaultModelId, chutesModels } from "@roo-code/types" +import { DEEP_SEEK_DEFAULT_TEMPERATURE, chutesDefaultModelId, chutesDefaultModelInfo } from "@roo-code/types" import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" import type { ApiHandlerOptions } from "../../shared/api" +import { getModelMaxOutputTokens } from "../../shared/api" import { XmlMatcher } from "../../utils/xml-matcher" import { convertToR1Format } from "../transform/r1-format" import { convertToOpenAiMessages } from "../transform/openai-format" import { ApiStream } from "../transform/stream" +import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" -import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider" +import { RouterProvider } from "./router-provider" -export class ChutesHandler extends BaseOpenAiCompatibleProvider { +export class ChutesHandler extends RouterProvider implements SingleCompletionHandler { constructor(options: ApiHandlerOptions) { super({ - ...options, - providerName: "Chutes", + options, + name: "chutes", baseURL: "https://llm.chutes.ai/v1", apiKey: options.chutesApiKey, - defaultProviderModelId: chutesDefaultModelId, - providerModels: chutesModels, - defaultTemperature: 0.5, + modelId: options.apiModelId, + defaultModelId: chutesDefaultModelId, + defaultModelInfo: chutesDefaultModelInfo, }) } @@ -27,25 +29,39 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider { systemPrompt: string, messages: Anthropic.Messages.MessageParam[], ): OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming { - const { - id: model, - info: { maxTokens: max_tokens }, - } = this.getModel() + const { id: model, info } = this.getModel() - const temperature = this.options.modelTemperature ?? this.getModel().info.temperature + // Centralized cap: clamp to 20% of the context window (unless provider-specific exceptions apply) + const max_tokens = + getModelMaxOutputTokens({ + modelId: model, + model: info, + settings: this.options, + format: "openai", + }) ?? undefined - return { + const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model, max_tokens, - temperature, messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)], stream: true, stream_options: { include_usage: true }, } + + // Only add temperature if model supports it + if (this.supportsTemperature(model)) { + params.temperature = this.options.modelTemperature ?? info.temperature + } + + return params } - override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream { - const model = this.getModel() + override async *createMessage( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + ): ApiStream { + const model = await this.fetchModel() if (model.id.includes("DeepSeek-R1")) { const stream = await this.client.chat.completions.create({ @@ -85,7 +101,65 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider { yield processedChunk } } else { - yield* super.createMessage(systemPrompt, messages) + // For non-DeepSeek-R1 models, use standard OpenAI streaming + const stream = await this.client.chat.completions.create(this.getCompletionParams(systemPrompt, messages)) + + for await (const chunk of stream) { + const delta = chunk.choices[0]?.delta + + if (delta?.content) { + yield { type: "text", text: delta.content } + } + + if (delta && "reasoning_content" in delta && delta.reasoning_content) { + yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" } + } + + if (chunk.usage) { + yield { + type: "usage", + inputTokens: chunk.usage.prompt_tokens || 0, + outputTokens: chunk.usage.completion_tokens || 0, + } + } + } + } + } + + async completePrompt(prompt: string): Promise { + const model = await this.fetchModel() + const { id: modelId, info } = model + + try { + // Centralized cap: clamp to 20% of the context window (unless provider-specific exceptions apply) + const max_tokens = + getModelMaxOutputTokens({ + modelId, + model: info, + settings: this.options, + format: "openai", + }) ?? undefined + + const requestParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { + model: modelId, + messages: [{ role: "user", content: prompt }], + max_tokens, + } + + // Only add temperature if model supports it + if (this.supportsTemperature(modelId)) { + const isDeepSeekR1 = modelId.includes("DeepSeek-R1") + const defaultTemperature = isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0.5 + requestParams.temperature = this.options.modelTemperature ?? defaultTemperature + } + + const response = await this.client.chat.completions.create(requestParams) + return response.choices[0]?.message.content || "" + } catch (error) { + if (error instanceof Error) { + throw new Error(`Chutes completion error: ${error.message}`) + } + throw error } } @@ -96,7 +170,7 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider { ...model, info: { ...model.info, - temperature: isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : this.defaultTemperature, + temperature: isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0.5, }, } } diff --git a/src/api/providers/fetchers/chutes.ts b/src/api/providers/fetchers/chutes.ts new file mode 100644 index 000000000000..c919aa9e26b0 --- /dev/null +++ b/src/api/providers/fetchers/chutes.ts @@ -0,0 +1,56 @@ +import axios from "axios" +import { z } from "zod" + +import { type ModelInfo } from "@roo-code/types" + +import { DEFAULT_HEADERS } from "../constants" + +// Chutes models endpoint follows OpenAI /models shape with additional fields +const ChutesModelSchema = z.object({ + id: z.string(), + object: z.literal("model").optional(), + owned_by: z.string().optional(), + created: z.number().optional(), + context_length: z.number(), + max_model_len: z.number(), + input_modalities: z.array(z.string()), +}) + +const ChutesModelsResponseSchema = z.object({ data: z.array(ChutesModelSchema) }) + +export async function getChutesModels(apiKey?: string): Promise> { + const headers: Record = { ...DEFAULT_HEADERS } + if (apiKey) headers["Authorization"] = `Bearer ${apiKey}` + + const url = "https://llm.chutes.ai/v1/models" + const models: Record = {} + + try { + const response = await axios.get(url, { headers }) + const parsed = ChutesModelsResponseSchema.safeParse(response.data) + const data = parsed.success ? parsed.data.data : response.data?.data || [] + + for (const m of data as Array>) { + // Extract from API response (all fields are required) + const contextWindow = m.context_length + const maxTokens = m.max_model_len + const supportsImages = m.input_modalities.includes("image") + + const info: ModelInfo = { + maxTokens, + contextWindow, + supportsImages, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: `Chutes AI model: ${m.id}`, + } + + models[m.id] = info + } + } catch (error) { + console.error(`Error fetching Chutes models: ${error instanceof Error ? error.message : String(error)}`) + } + + return models +} diff --git a/src/api/providers/fetchers/modelCache.ts b/src/api/providers/fetchers/modelCache.ts index 55b5bc3a3047..722e66dd7286 100644 --- a/src/api/providers/fetchers/modelCache.ts +++ b/src/api/providers/fetchers/modelCache.ts @@ -25,6 +25,7 @@ import { getIOIntelligenceModels } from "./io-intelligence" import { getDeepInfraModels } from "./deepinfra" import { getHuggingFaceModels } from "./huggingface" import { getRooModels } from "./roo" +import { getChutesModels } from "./chutes" const memoryCache = new NodeCache({ stdTTL: 5 * 60, checkperiod: 5 * 60 }) @@ -107,6 +108,9 @@ export const getModels = async (options: GetModelsOptions): Promise models = await getRooModels(rooBaseUrl, options.apiKey) break } + case "chutes": + models = await getChutesModels(options.apiKey) + break default: { // Ensures router is exhaustively checked if RouterName is a strict union. const exhaustiveCheck: never = provider diff --git a/src/core/webview/__tests__/ClineProvider.spec.ts b/src/core/webview/__tests__/ClineProvider.spec.ts index 3d68fac2acb0..a8ab39108d95 100644 --- a/src/core/webview/__tests__/ClineProvider.spec.ts +++ b/src/core/webview/__tests__/ClineProvider.spec.ts @@ -2701,6 +2701,7 @@ describe("ClineProvider - Router Models", () => { apiKey: "litellm-key", baseUrl: "http://localhost:4000", }) + expect(getModels).toHaveBeenCalledWith({ provider: "chutes" }) // Verify response was sent expect(mockPostMessage).toHaveBeenCalledWith({ @@ -2712,6 +2713,7 @@ describe("ClineProvider - Router Models", () => { glama: mockModels, unbound: mockModels, roo: mockModels, + chutes: mockModels, litellm: mockModels, ollama: {}, lmstudio: {}, @@ -2719,6 +2721,7 @@ describe("ClineProvider - Router Models", () => { huggingface: {}, "io-intelligence": {}, }, + values: undefined, }) }) @@ -2751,6 +2754,7 @@ describe("ClineProvider - Router Models", () => { .mockResolvedValueOnce(mockModels) // vercel-ai-gateway success .mockResolvedValueOnce(mockModels) // deepinfra success .mockResolvedValueOnce(mockModels) // roo success + .mockRejectedValueOnce(new Error("Chutes API error")) // chutes fail .mockRejectedValueOnce(new Error("LiteLLM connection failed")) // litellm fail await messageHandler({ type: "requestRouterModels" }) @@ -2765,6 +2769,7 @@ describe("ClineProvider - Router Models", () => { glama: mockModels, unbound: {}, roo: mockModels, + chutes: {}, ollama: {}, lmstudio: {}, litellm: {}, @@ -2772,6 +2777,7 @@ describe("ClineProvider - Router Models", () => { huggingface: {}, "io-intelligence": {}, }, + values: undefined, }) // Verify error messages were sent for failed providers @@ -2796,6 +2802,13 @@ describe("ClineProvider - Router Models", () => { values: { provider: "unbound" }, }) + expect(mockPostMessage).toHaveBeenCalledWith({ + type: "singleRouterModelFetchResponse", + success: false, + error: "Chutes API error", + values: { provider: "chutes" }, + }) + expect(mockPostMessage).toHaveBeenCalledWith({ type: "singleRouterModelFetchResponse", success: false, @@ -2880,6 +2893,7 @@ describe("ClineProvider - Router Models", () => { glama: mockModels, unbound: mockModels, roo: mockModels, + chutes: mockModels, litellm: {}, ollama: {}, lmstudio: {}, @@ -2887,6 +2901,7 @@ describe("ClineProvider - Router Models", () => { huggingface: {}, "io-intelligence": {}, }, + values: undefined, }) }) diff --git a/src/core/webview/__tests__/webviewMessageHandler.spec.ts b/src/core/webview/__tests__/webviewMessageHandler.spec.ts index 749e8d090d82..3fd2a47f3778 100644 --- a/src/core/webview/__tests__/webviewMessageHandler.spec.ts +++ b/src/core/webview/__tests__/webviewMessageHandler.spec.ts @@ -249,12 +249,14 @@ describe("webviewMessageHandler - requestRouterModels", () => { unbound: mockModels, litellm: mockModels, roo: mockModels, + chutes: mockModels, ollama: {}, lmstudio: {}, "vercel-ai-gateway": mockModels, huggingface: {}, "io-intelligence": {}, }, + values: undefined, }) }) @@ -340,6 +342,7 @@ describe("webviewMessageHandler - requestRouterModels", () => { glama: mockModels, unbound: mockModels, roo: mockModels, + chutes: mockModels, litellm: {}, ollama: {}, lmstudio: {}, @@ -347,6 +350,7 @@ describe("webviewMessageHandler - requestRouterModels", () => { huggingface: {}, "io-intelligence": {}, }, + values: undefined, }) }) @@ -369,32 +373,14 @@ describe("webviewMessageHandler - requestRouterModels", () => { .mockResolvedValueOnce(mockModels) // vercel-ai-gateway .mockResolvedValueOnce(mockModels) // deepinfra .mockResolvedValueOnce(mockModels) // roo + .mockRejectedValueOnce(new Error("Chutes API error")) // chutes .mockRejectedValueOnce(new Error("LiteLLM connection failed")) // litellm await webviewMessageHandler(mockClineProvider, { type: "requestRouterModels", }) - // Verify successful providers are included - expect(mockClineProvider.postMessageToWebview).toHaveBeenCalledWith({ - type: "routerModels", - routerModels: { - deepinfra: mockModels, - openrouter: mockModels, - requesty: {}, - glama: mockModels, - unbound: {}, - roo: mockModels, - litellm: {}, - ollama: {}, - lmstudio: {}, - "vercel-ai-gateway": mockModels, - huggingface: {}, - "io-intelligence": {}, - }, - }) - - // Verify error messages were sent for failed providers + // Verify error messages were sent for failed providers (these come first) expect(mockClineProvider.postMessageToWebview).toHaveBeenCalledWith({ type: "singleRouterModelFetchResponse", success: false, @@ -409,12 +395,40 @@ describe("webviewMessageHandler - requestRouterModels", () => { values: { provider: "unbound" }, }) + expect(mockClineProvider.postMessageToWebview).toHaveBeenCalledWith({ + type: "singleRouterModelFetchResponse", + success: false, + error: "Chutes API error", + values: { provider: "chutes" }, + }) + expect(mockClineProvider.postMessageToWebview).toHaveBeenCalledWith({ type: "singleRouterModelFetchResponse", success: false, error: "LiteLLM connection failed", values: { provider: "litellm" }, }) + + // Verify final routerModels response includes successful providers and empty objects for failed ones + expect(mockClineProvider.postMessageToWebview).toHaveBeenCalledWith({ + type: "routerModels", + routerModels: { + deepinfra: mockModels, + openrouter: mockModels, + requesty: {}, + glama: mockModels, + unbound: {}, + roo: mockModels, + chutes: {}, + litellm: {}, + ollama: {}, + lmstudio: {}, + "vercel-ai-gateway": mockModels, + huggingface: {}, + "io-intelligence": {}, + }, + values: undefined, + }) }) it("handles Error objects and string errors correctly", async () => { @@ -427,6 +441,7 @@ describe("webviewMessageHandler - requestRouterModels", () => { .mockRejectedValueOnce(new Error("Vercel AI Gateway error")) // vercel-ai-gateway .mockRejectedValueOnce(new Error("DeepInfra API error")) // deepinfra .mockRejectedValueOnce(new Error("Roo API error")) // roo + .mockRejectedValueOnce(new Error("Chutes API error")) // chutes .mockRejectedValueOnce(new Error("LiteLLM connection failed")) // litellm await webviewMessageHandler(mockClineProvider, { @@ -483,6 +498,13 @@ describe("webviewMessageHandler - requestRouterModels", () => { values: { provider: "roo" }, }) + expect(mockClineProvider.postMessageToWebview).toHaveBeenCalledWith({ + type: "singleRouterModelFetchResponse", + success: false, + error: "Chutes API error", + values: { provider: "chutes" }, + }) + expect(mockClineProvider.postMessageToWebview).toHaveBeenCalledWith({ type: "singleRouterModelFetchResponse", success: false, diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index 6c52b5ee2899..3a1727c77910 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -776,6 +776,7 @@ export const webviewMessageHandler = async ( ollama: {}, lmstudio: {}, roo: {}, + chutes: {}, } const safeGetModels = async (options: GetModelsOptions): Promise => { @@ -823,6 +824,10 @@ export const webviewMessageHandler = async ( : undefined, }, }, + { + key: "chutes", + options: { provider: "chutes", apiKey: apiConfiguration.chutesApiKey }, + }, ] // IO Intelligence is conditional on api key diff --git a/src/shared/api.ts b/src/shared/api.ts index 8b18e7f50d8a..802654adaad9 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -164,6 +164,7 @@ const dynamicProviderExtras = { ollama: {} as {}, // eslint-disable-line @typescript-eslint/no-empty-object-type lmstudio: {} as {}, // eslint-disable-line @typescript-eslint/no-empty-object-type roo: {} as { apiKey?: string; baseUrl?: string }, + chutes: {} as { apiKey?: string }, } as const satisfies Record // Build the dynamic options union from the map, intersected with CommonFetchParams diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx index 095f30a30270..e2e7ba561573 100644 --- a/webview-ui/src/components/settings/ApiOptions.tsx +++ b/webview-ui/src/components/settings/ApiOptions.tsx @@ -625,7 +625,13 @@ const ApiOptions = ({ )} {selectedProvider === "chutes" && ( - + )} {selectedProvider === "litellm" && ( diff --git a/webview-ui/src/components/settings/constants.ts b/webview-ui/src/components/settings/constants.ts index 7d7020ddc8d0..a6631dfd66f4 100644 --- a/webview-ui/src/components/settings/constants.ts +++ b/webview-ui/src/components/settings/constants.ts @@ -14,7 +14,6 @@ import { vertexModels, xaiModels, groqModels, - chutesModels, sambaNovaModels, doubaoModels, internationalZAiModels, @@ -38,7 +37,6 @@ export const MODELS_BY_PROVIDER: Partial void + routerModels?: RouterModels + organizationAllowList: OrganizationAllowList + modelValidationError?: string } -export const Chutes = ({ apiConfiguration, setApiConfigurationField }: ChutesProps) => { +export const Chutes = ({ + apiConfiguration, + setApiConfigurationField, + routerModels, + organizationAllowList, + modelValidationError, +}: ChutesProps) => { const { t } = useAppTranslation() const handleInputChange = useCallback( @@ -45,6 +58,18 @@ export const Chutes = ({ apiConfiguration, setApiConfigurationField }: ChutesPro {t("settings:providers.getChutesApiKey")} )} + + ) } diff --git a/webview-ui/src/components/ui/hooks/useSelectedModel.ts b/webview-ui/src/components/ui/hooks/useSelectedModel.ts index 1bfb2ea332d4..296b262c3731 100644 --- a/webview-ui/src/components/ui/hooks/useSelectedModel.ts +++ b/webview-ui/src/components/ui/hooks/useSelectedModel.ts @@ -27,7 +27,6 @@ import { xaiModels, groqModels, groqDefaultModelId, - chutesModels, chutesDefaultModelId, vscodeLlmModels, vscodeLlmDefaultModelId, @@ -203,7 +202,7 @@ function getSelectedModel({ } case "chutes": { const id = apiConfiguration.apiModelId ?? chutesDefaultModelId - const info = chutesModels[id as keyof typeof chutesModels] + const info = routerModels.chutes[id] return { id, info } } case "bedrock": { diff --git a/webview-ui/src/utils/__tests__/validate.test.ts b/webview-ui/src/utils/__tests__/validate.test.ts index c2451dcd6f32..0bd7a15962b5 100644 --- a/webview-ui/src/utils/__tests__/validate.test.ts +++ b/webview-ui/src/utils/__tests__/validate.test.ts @@ -44,6 +44,7 @@ describe("Model Validation Functions", () => { "vercel-ai-gateway": {}, huggingface: {}, roo: {}, + chutes: {}, } const allowAllOrganization: OrganizationAllowList = {