diff --git a/.changeset/zenmux-context-window-fix.md b/.changeset/zenmux-context-window-fix.md new file mode 100644 index 00000000000..49538b9430a --- /dev/null +++ b/.changeset/zenmux-context-window-fix.md @@ -0,0 +1,5 @@ +--- +"kilo-code": patch +--- + +Fixed ZenMux context window detection to prevent erroneous context-condensing loops. diff --git a/.changeset/zenmux-native-tools-reliability.md b/.changeset/zenmux-native-tools-reliability.md new file mode 100644 index 00000000000..1fd6501810d --- /dev/null +++ b/.changeset/zenmux-native-tools-reliability.md @@ -0,0 +1,5 @@ +--- +"kilo-code": patch +--- + +Fixed ZenMux tool-calling reliability to avoid repeated "tool not used" loops and preserve transformed request messages. diff --git a/packages/types/src/providers/zenmux.ts b/packages/types/src/providers/zenmux.ts index 1fd878018ea..4db672bc388 100644 --- a/packages/types/src/providers/zenmux.ts +++ b/packages/types/src/providers/zenmux.ts @@ -9,6 +9,10 @@ export const zenmuxDefaultModelInfo: ModelInfo = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + // kilocode_change start + supportsNativeTools: true, + defaultToolProtocol: "native", + // kilocode_change end inputPrice: 15.0, outputPrice: 75.0, cacheWritesPrice: 18.75, diff --git a/src/api/providers/__tests__/zenmux-native-tools.spec.ts b/src/api/providers/__tests__/zenmux-native-tools.spec.ts new file mode 100644 index 00000000000..d67651860e7 --- /dev/null +++ b/src/api/providers/__tests__/zenmux-native-tools.spec.ts @@ -0,0 +1,175 @@ +// kilocode_change - new file +import OpenAI from "openai" + +import type { ApiHandlerCreateMessageMetadata } from "../../index" +import type { ApiHandlerOptions } from "../../../shared/api" +import { ZenMuxHandler } from "../zenmux" + +vi.mock("../fetchers/modelCache", () => ({ + getModels: vi.fn().mockResolvedValue({}), +})) + +function createMockStream() { + return { + async *[Symbol.asyncIterator]() { + yield { + choices: [{ delta: { content: "ok" }, finish_reason: "stop" }], + usage: { prompt_tokens: 1, completion_tokens: 1, cost: 0 }, + } + }, + } +} + +async function consume(generator: AsyncGenerator) { + for await (const _chunk of generator) { + // Consume all chunks + } +} + +describe("ZenMuxHandler native tools and message pipeline", () => { + const baseOptions: ApiHandlerOptions = { + zenmuxApiKey: "test-key", + zenmuxModelId: "z-ai/glm-5", + zenmuxBaseUrl: "https://test.zenmux.ai/api/v1", + } + + it("merges native tool defaults when model cache entry lacks native metadata", () => { + const handler = new ZenMuxHandler(baseOptions) + ;(handler as unknown as { models: Record }).models = { + "z-ai/glm-5": { + maxTokens: 8192, + contextWindow: 128000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "GLM 5", + }, + } + + const model = handler.getModel() + expect(model.info.supportsNativeTools).toBe(true) + expect(model.info.defaultToolProtocol).toBe("native") + }) + + it("passes tools and tool choice to stream creation when task protocol is native", async () => { + const handler = new ZenMuxHandler(baseOptions) + + vi.spyOn(handler, "fetchModel").mockResolvedValue({ + id: "z-ai/glm-5", + info: { + maxTokens: 8192, + contextWindow: 128000, + supportsNativeTools: true, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "GLM 5", + }, + } as any) + + const streamSpy = vi.spyOn(handler, "createZenMuxStream").mockResolvedValue(createMockStream() as any) + + const tools: OpenAI.Chat.ChatCompletionTool[] = [ + { + type: "function", + function: { + name: "attempt_completion", + description: "Complete the task", + parameters: { type: "object", properties: {} }, + }, + }, + ] + const metadata: ApiHandlerCreateMessageMetadata = { + taskId: "task-native", + toolProtocol: "native", + tools, + tool_choice: "auto", + parallelToolCalls: true, + } + + await consume(handler.createMessage("system", [{ role: "user", content: "hi" }], metadata)) + + expect(streamSpy).toHaveBeenCalledTimes(1) + expect(streamSpy.mock.calls[0][6]).toEqual(tools) + expect(streamSpy.mock.calls[0][7]).toBe("auto") + expect(streamSpy.mock.calls[0][8]).toBe(true) + }) + + it("omits tools when task protocol is xml even if tools are provided", async () => { + const handler = new ZenMuxHandler(baseOptions) + + vi.spyOn(handler, "fetchModel").mockResolvedValue({ + id: "z-ai/glm-5", + info: { + maxTokens: 8192, + contextWindow: 128000, + supportsNativeTools: true, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "GLM 5", + }, + } as any) + + const streamSpy = vi.spyOn(handler, "createZenMuxStream").mockResolvedValue(createMockStream() as any) + + const tools: OpenAI.Chat.ChatCompletionTool[] = [ + { + type: "function", + function: { + name: "ask_followup_question", + description: "Ask a follow-up question", + parameters: { type: "object", properties: {} }, + }, + }, + ] + + await consume( + handler.createMessage("system", [{ role: "user", content: "hi" }], { + taskId: "task-xml", + toolProtocol: "xml", + tools, + tool_choice: "auto", + parallelToolCalls: true, + }), + ) + + expect(streamSpy).toHaveBeenCalledTimes(1) + expect(streamSpy.mock.calls[0][6]).toBeUndefined() + expect(streamSpy.mock.calls[0][7]).toBeUndefined() + expect(streamSpy.mock.calls[0][8]).toBe(false) + }) + + it("passes transformed DeepSeek R1 messages into stream creation", async () => { + const handler = new ZenMuxHandler({ + ...baseOptions, + zenmuxModelId: "deepseek/deepseek-r1", + }) + + vi.spyOn(handler, "fetchModel").mockResolvedValue({ + id: "deepseek/deepseek-r1", + info: { + maxTokens: 8192, + contextWindow: 128000, + supportsNativeTools: true, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "DeepSeek R1", + }, + } as any) + + const streamSpy = vi.spyOn(handler, "createZenMuxStream").mockResolvedValue(createMockStream() as any) + + await consume(handler.createMessage("system prompt", [{ role: "user", content: "hi" }], { taskId: "task-r1" })) + + expect(streamSpy).toHaveBeenCalledTimes(1) + const sentMessages = streamSpy.mock.calls[0][1] as OpenAI.Chat.ChatCompletionMessageParam[] + expect(sentMessages.some((message: any) => message.role === "system")).toBe(false) + expect((sentMessages[0] as any).role).toBe("user") + }) +}) diff --git a/src/api/providers/fetchers/__tests__/modelCache.spec.ts b/src/api/providers/fetchers/__tests__/modelCache.spec.ts index c03011b5097..aced5a2a7bc 100644 --- a/src/api/providers/fetchers/__tests__/modelCache.spec.ts +++ b/src/api/providers/fetchers/__tests__/modelCache.spec.ts @@ -333,6 +333,49 @@ describe("getModelsFromCache disk fallback", () => { consoleErrorSpy.mockRestore() }) + + // kilocode_change start + it("rejects stale ZenMux cache entries with invalid contextWindow", () => { + const invalidZenmuxModels = { + "anthropic/claude-opus-4": { + maxTokens: 0, + contextWindow: 0, + supportsPromptCache: false, + }, + } + + mockCache.get.mockReturnValue(invalidZenmuxModels) + + const consoleWarnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}) + + const result = getModelsFromCache("zenmux") + + expect(result).toBeUndefined() + expect(consoleWarnSpy).toHaveBeenCalledWith( + "[MODEL_CACHE] Ignoring stale ZenMux model cache with invalid contextWindow values", + ) + expect(fsSync.existsSync).not.toHaveBeenCalled() + + consoleWarnSpy.mockRestore() + }) + + it("accepts valid ZenMux cache entries", () => { + const validZenmuxModels = { + "anthropic/claude-opus-4": { + maxTokens: 0, + contextWindow: 200000, + supportsPromptCache: false, + }, + } + + mockCache.get.mockReturnValue(validZenmuxModels) + + const result = getModelsFromCache("zenmux") + + expect(result).toEqual(validZenmuxModels) + expect(fsSync.existsSync).not.toHaveBeenCalled() + }) + // kilocode_change end }) describe("empty cache protection", () => { diff --git a/src/api/providers/fetchers/__tests__/zenmux.spec.ts b/src/api/providers/fetchers/__tests__/zenmux.spec.ts new file mode 100644 index 00000000000..ad68df52d7e --- /dev/null +++ b/src/api/providers/fetchers/__tests__/zenmux.spec.ts @@ -0,0 +1,70 @@ +// kilocode_change - new file +import { zenmuxDefaultModelInfo } from "@roo-code/types" +import { getZenmuxModels } from "../zenmux" + +describe("getZenmuxModels", () => { + afterEach(() => { + vi.unstubAllGlobals() + vi.restoreAllMocks() + }) + + it("maps context_length from ZenMux model payload", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + json: vi.fn().mockResolvedValue({ + object: "list", + data: [ + { + id: "anthropic/claude-opus-4", + object: "model", + created: 1767146192, + owned_by: "anthropic", + display_name: "Claude Opus 4", + context_length: 200000, + input_modalities: ["text", "image"], + }, + ], + }), + }) + vi.stubGlobal("fetch", fetchMock) + + const models = await getZenmuxModels() + + expect(models["anthropic/claude-opus-4"]).toEqual({ + maxTokens: 0, + contextWindow: 200000, + supportsImages: true, + supportsPromptCache: false, + supportsNativeTools: true, + defaultToolProtocol: "native", + inputPrice: 0, + outputPrice: 0, + description: "anthropic model", + displayName: "Claude Opus 4", + }) + expect(models["anthropic/claude-opus-4"].contextWindow).toBeGreaterThan(0) + }) + + it("falls back to default context window when optional metadata is missing", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + json: vi.fn().mockResolvedValue({ + object: "list", + data: [ + { + id: "openai/gpt-5", + object: "model", + created: 1767146192, + owned_by: "openai", + }, + ], + }), + }) + vi.stubGlobal("fetch", fetchMock) + + const models = await getZenmuxModels() + + expect(models["openai/gpt-5"].contextWindow).toBe(zenmuxDefaultModelInfo.contextWindow) + expect(models["openai/gpt-5"].displayName).toBe("openai/gpt-5") + expect(models["openai/gpt-5"].supportsNativeTools).toBe(true) + expect(models["openai/gpt-5"].defaultToolProtocol).toBe("native") + }) +}) diff --git a/src/api/providers/fetchers/modelCache.ts b/src/api/providers/fetchers/modelCache.ts index 4e436a810c6..eaf86708fa5 100644 --- a/src/api/providers/fetchers/modelCache.ts +++ b/src/api/providers/fetchers/modelCache.ts @@ -394,6 +394,12 @@ export function getModelsFromCache(provider: ProviderName): ModelRecord | undefi // Check memory cache first (fast) const memoryModels = memoryCache.get(provider) if (memoryModels) { + // kilocode_change start + if (provider === "zenmux" && hasInvalidZenmuxContextWindow(memoryModels)) { + console.warn("[MODEL_CACHE] Ignoring stale ZenMux model cache with invalid contextWindow values") + return undefined + } + // kilocode_change end return memoryModels } @@ -429,6 +435,13 @@ export function getModelsFromCache(provider: ProviderName): ModelRecord | undefi ) return undefined } + // kilocode_change start + // Self-heal stale ZenMux cache entries from v5.7.0 where contextWindow was persisted as 0. + if (provider === "zenmux" && hasInvalidZenmuxContextWindow(validation.data)) { + console.warn("[MODEL_CACHE] Ignoring stale ZenMux model cache with invalid contextWindow values") + return undefined + } + // kilocode_change end // Populate memory cache for future fast access memoryCache.set(provider, validation.data) @@ -459,3 +472,9 @@ function getCacheDirectoryPathSync(): string | undefined { return undefined } } + +// kilocode_change start +function hasInvalidZenmuxContextWindow(models: ModelRecord): boolean { + return Object.values(models).some((model) => (model.contextWindow ?? 0) <= 0) +} +// kilocode_change end diff --git a/src/api/providers/fetchers/zenmux.ts b/src/api/providers/fetchers/zenmux.ts index 4b3e3197ecb..9652a7a1377 100644 --- a/src/api/providers/fetchers/zenmux.ts +++ b/src/api/providers/fetchers/zenmux.ts @@ -1,9 +1,8 @@ import { z } from "zod" -import { type ModelInfo } from "@roo-code/types" +import { type ModelInfo, zenmuxDefaultModelInfo } from "@roo-code/types" import type { ApiHandlerOptions } from "../../../shared/api" import { DEFAULT_HEADERS } from "../constants" -import { parseApiPrice } from "../../../shared/cost" /** * ZenMuxModel @@ -13,6 +12,9 @@ const zenMuxModelSchema = z.object({ object: z.string(), created: z.number(), owned_by: z.string(), + display_name: z.string().optional(), + context_length: z.number().optional(), + input_modalities: z.array(z.string()).optional(), }) export type ZenMuxModel = z.infer @@ -32,7 +34,7 @@ export async function getZenmuxModels( options?: ApiHandlerOptions & { headers?: Record }, ): Promise> { const models: Record = {} - const baseURL = "https://zenmux.ai/api/v1" + const baseURL = options?.openRouterBaseUrl || "https://zenmux.ai/api/v1" try { const response = await fetch(`${baseURL}/models`, { headers: { ...DEFAULT_HEADERS, ...(options?.headers ?? {}) }, @@ -47,16 +49,23 @@ export async function getZenmuxModels( const data = result.data.data for (const model of data) { - const { id, owned_by } = model + const { id, owned_by, display_name, context_length, input_modalities } = model + const contextWindow = context_length && context_length > 0 ? context_length : zenmuxDefaultModelInfo.contextWindow const modelInfo: ModelInfo = { + // Keep max tokens conservative and let centralized max-token logic decide runtime reservation. maxTokens: 0, - contextWindow: 0, + contextWindow, + supportsImages: input_modalities?.includes("image") ?? false, supportsPromptCache: false, + // kilocode_change start + supportsNativeTools: true, + defaultToolProtocol: "native", + // kilocode_change end inputPrice: 0, outputPrice: 0, description: `${owned_by || "ZenMux"} model`, - displayName: id, + displayName: display_name || id, } models[id] = modelInfo diff --git a/src/api/providers/zenmux.ts b/src/api/providers/zenmux.ts index 11f92aae420..84305544433 100644 --- a/src/api/providers/zenmux.ts +++ b/src/api/providers/zenmux.ts @@ -2,7 +2,7 @@ import OpenAI from "openai" import type Anthropic from "@anthropic-ai/sdk" import type { ModelInfo } from "@roo-code/types" -import { zenmuxDefaultModelId, zenmuxDefaultModelInfo } from "@roo-code/types" +import { NATIVE_TOOL_DEFAULTS, TOOL_PROTOCOL, zenmuxDefaultModelId, zenmuxDefaultModelInfo } from "@roo-code/types" import { ApiProviderError } from "@roo-code/types" import { TelemetryService } from "@roo-code/telemetry" @@ -24,7 +24,6 @@ import { ChatCompletionTool } from "openai/resources" import { convertToOpenAiMessages } from "../transform/openai-format" import { convertToR1Format } from "../transform/r1-format" import { resolveToolProtocol } from "../../utils/resolveToolProtocol" -import { TOOL_PROTOCOL } from "@roo-code/types" import { ApiStreamChunk } from "../transform/stream" import { NativeToolCallParser } from "../../core/assistant-message/NativeToolCallParser" import { KiloCodeChunkSchema } from "./kilocode/chunk-schema" @@ -117,21 +116,16 @@ export class ZenMuxHandler extends BaseProvider implements SingleCompletionHandl } async createZenMuxStream( client: OpenAI, - systemPrompt: string, - messages: Anthropic.Messages.MessageParam[], + openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[], model: { id: string; info: ModelInfo }, _reasoningEffort?: string, thinkingBudgetTokens?: number, zenMuxProviderSorting?: string, tools?: Array, + toolChoice?: OpenAI.Chat.ChatCompletionCreateParams["tool_choice"], + parallelToolCalls: boolean = false, _geminiThinkingLevel?: string, ) { - // Convert Anthropic messages to OpenAI format - const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - ...convertToOpenAiMessages(messages), - ] - // Build reasoning config if thinking budget is set let reasoning: { max_tokens: number } | undefined if (thinkingBudgetTokens && thinkingBudgetTokens > 0) { @@ -155,16 +149,20 @@ export class ZenMuxHandler extends BaseProvider implements SingleCompletionHandl }, } : {}), - ...this.getOpenAIToolParams(tools), + ...this.getOpenAIToolParams(tools, toolChoice, parallelToolCalls), }) return stream } - getOpenAIToolParams(tools?: ChatCompletionTool[], enableParallelToolCalls: boolean = false) { + getOpenAIToolParams( + tools?: ChatCompletionTool[], + toolChoice?: OpenAI.Chat.ChatCompletionCreateParams["tool_choice"], + enableParallelToolCalls: boolean = false, + ) { return tools?.length ? { tools, - tool_choice: tools ? "auto" : undefined, + tool_choice: toolChoice ?? "auto", parallel_tool_calls: enableParallelToolCalls ? true : false, } : { @@ -219,7 +217,9 @@ export class ZenMuxHandler extends BaseProvider implements SingleCompletionHandl } // Process reasoning_details when switching models to Gemini for native tool call compatibility - const toolProtocol = resolveToolProtocol(this.options, model.info) + // kilocode_change start + const toolProtocol = resolveToolProtocol(this.options, model.info, metadata?.toolProtocol) + // kilocode_change end const isNativeProtocol = toolProtocol === TOOL_PROTOCOL.NATIVE const isGemini = modelId.startsWith("google/gemini") @@ -264,17 +264,24 @@ export class ZenMuxHandler extends BaseProvider implements SingleCompletionHandl } } + // kilocode_change start + const tools = isNativeProtocol ? metadata?.tools : undefined + const toolChoice = isNativeProtocol ? metadata?.tool_choice : undefined + const parallelToolCalls = isNativeProtocol ? (metadata?.parallelToolCalls ?? false) : false + // kilocode_change end + let stream try { stream = await this.createZenMuxStream( this.client, - systemPrompt, - messages, + openAiMessages, model, this.options.reasoningEffort, this.options.modelMaxThinkingTokens, this.options.zenmuxProviderSort, - metadata?.tools, + tools, + toolChoice, + parallelToolCalls, ) } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error) @@ -447,7 +454,9 @@ export class ZenMuxHandler extends BaseProvider implements SingleCompletionHandl override getModel() { const id = this.options.zenmuxModelId ?? zenmuxDefaultModelId - let info = this.models[id] ?? zenmuxDefaultModelInfo + // kilocode_change start + let info = { ...NATIVE_TOOL_DEFAULTS, ...(this.models[id] ?? zenmuxDefaultModelInfo) } + // kilocode_change end const isDeepSeekR1 = id.startsWith("deepseek/deepseek-r1") || id === "perplexity/sonar-reasoning"