diff --git a/packages/evals/src/cli/messageLogDeduper.test.ts b/packages/evals/src/cli/messageLogDeduper.test.ts new file mode 100644 index 00000000000..5556c0c8505 --- /dev/null +++ b/packages/evals/src/cli/messageLogDeduper.test.ts @@ -0,0 +1,35 @@ +import { MessageLogDeduper } from "./messageLogDeduper.js" + +describe("MessageLogDeduper", () => { + it("dedupes identical messages for same action+ts", () => { + const d = new MessageLogDeduper() + const msg = { ts: 123, type: "say", say: "reasoning", text: "hello", partial: false } + + expect(d.shouldLog("updated", msg)).toBe(true) + expect(d.shouldLog("updated", msg)).toBe(false) + }) + + it("logs again if payload changes for same action+ts", () => { + const d = new MessageLogDeduper() + expect(d.shouldLog("updated", { ts: 123, text: "a" })).toBe(true) + expect(d.shouldLog("updated", { ts: 123, text: "b" })).toBe(true) + }) + + it("does not dedupe across different actions", () => { + const d = new MessageLogDeduper() + const msg = { ts: 123, text: "same" } + expect(d.shouldLog("created", msg)).toBe(true) + expect(d.shouldLog("updated", msg)).toBe(true) + }) + + it("evicts oldest entries", () => { + const d = new MessageLogDeduper(2) + + expect(d.shouldLog("updated", { ts: 1, text: "a" })).toBe(true) + expect(d.shouldLog("updated", { ts: 2, text: "b" })).toBe(true) + // causes eviction of ts:1 + expect(d.shouldLog("updated", { ts: 3, text: "c" })).toBe(true) + // ts:1 was evicted so it should log again + expect(d.shouldLog("updated", { ts: 1, text: "a" })).toBe(true) + }) +}) diff --git a/packages/evals/src/cli/messageLogDeduper.ts b/packages/evals/src/cli/messageLogDeduper.ts new file mode 100644 index 00000000000..ed0c7714b4f --- /dev/null +++ b/packages/evals/src/cli/messageLogDeduper.ts @@ -0,0 +1,50 @@ +export class MessageLogDeduper { + private readonly lastLoggedByKey = new Map() + + constructor(private readonly maxEntries = 10_000) {} + + /** + * Returns true if this message should be logged. + * Dedupe key: `${action}:${message.ts}`. + * Dedupe rule: skip if payload is identical to the last logged payload for that key. + */ + public shouldLog(action: string | undefined, message: unknown): boolean { + if (!action || !message || typeof message !== "object") { + return true + } + + const ts = (message as { ts?: unknown }).ts + if (typeof ts !== "number") { + return true + } + + let serialized: string + try { + serialized = JSON.stringify(message) + } catch { + // If serialization fails, prefer logging. + return true + } + + const key = `${action}:${ts}` + const prev = this.lastLoggedByKey.get(key) + if (prev === serialized) { + return false + } + + // Refresh insertion order so eviction removes true oldest. + if (this.lastLoggedByKey.has(key)) { + this.lastLoggedByKey.delete(key) + } + this.lastLoggedByKey.set(key, serialized) + + if (this.lastLoggedByKey.size > this.maxEntries) { + const oldestKey = this.lastLoggedByKey.keys().next().value as string | undefined + if (oldestKey) { + this.lastLoggedByKey.delete(oldestKey) + } + } + + return true + } +} diff --git a/packages/evals/src/cli/runTask.ts b/packages/evals/src/cli/runTask.ts index d7f37e72a1f..a6ae6c03059 100644 --- a/packages/evals/src/cli/runTask.ts +++ b/packages/evals/src/cli/runTask.ts @@ -32,6 +32,7 @@ import { EVALS_REPO_PATH } from "../exercises/index.js" import { Logger, getTag, isDockerContainer } from "./utils.js" import { redisClient, getPubSubKey, registerRunner, deregisterRunner } from "./redis.js" import { runUnitTest } from "./runUnitTest.js" +import { MessageLogDeduper } from "./messageLogDeduper.js" class SubprocessTimeoutError extends Error { constructor(timeout: number) { @@ -305,6 +306,7 @@ export const runTask = async ({ run, task, publish, logger, jobToken }: RunTaskO ] let isApiUnstable = false + const messageLogDeduper = new MessageLogDeduper() client.on(IpcMessageType.TaskEvent, async (taskEvent) => { const { eventName, payload } = taskEvent @@ -330,6 +332,15 @@ export const runTask = async ({ run, task, publish, logger, jobToken }: RunTaskO (payload[0].message.say && loggableSays.includes(payload[0].message.say)) || payload[0].message.partial !== true) ) { + // Dedupe identical repeated message events (same message.ts + same payload) + if (eventName === RooCodeEventName.Message) { + const action = payload[0]?.action as string | undefined + const message = payload[0]?.message + if (!messageLogDeduper.shouldLog(action, message)) { + return + } + } + // Extract tool name for tool-related messages for clearer logging let logEventName: string = eventName if (eventName === RooCodeEventName.Message && payload[0]?.message?.ask === "tool") { diff --git a/packages/types/src/providers/baseten.ts b/packages/types/src/providers/baseten.ts index 0eff95b5160..eeb6b0d2d1f 100644 --- a/packages/types/src/providers/baseten.ts +++ b/packages/types/src/providers/baseten.ts @@ -33,6 +33,7 @@ export const basetenModels = { contextWindow: 163_840, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 2.55, outputPrice: 5.95, cacheWritesPrice: 0, @@ -44,6 +45,7 @@ export const basetenModels = { contextWindow: 163_840, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 2.55, outputPrice: 5.95, cacheWritesPrice: 0, @@ -55,6 +57,7 @@ export const basetenModels = { contextWindow: 163_840, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0.77, outputPrice: 0.77, cacheWritesPrice: 0, @@ -66,6 +69,7 @@ export const basetenModels = { contextWindow: 163_840, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0.5, outputPrice: 1.5, cacheWritesPrice: 0, @@ -86,11 +90,24 @@ export const basetenModels = { description: "DeepSeek's hybrid reasoning model with efficient long context scaling with GPT-5 level performance", }, + "openai/gpt-oss-120b": { + maxTokens: 16_384, + contextWindow: 128_072, + supportsImages: false, + supportsPromptCache: false, + supportsNativeTools: true, + inputPrice: 0.1, + outputPrice: 0.5, + cacheWritesPrice: 0, + cacheReadsPrice: 0, + description: "Extremely capable general-purpose LLM with strong, controllable reasoning capabilities", + }, "Qwen/Qwen3-235B-A22B-Instruct-2507": { maxTokens: 16_384, contextWindow: 262_144, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0.22, outputPrice: 0.8, cacheWritesPrice: 0, @@ -102,24 +119,13 @@ export const basetenModels = { contextWindow: 262_144, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0.38, outputPrice: 1.53, cacheWritesPrice: 0, cacheReadsPrice: 0, description: "Mixture-of-experts LLM with advanced coding and reasoning capabilities", }, - "openai/gpt-oss-120b": { - maxTokens: 16_384, - contextWindow: 128_072, - supportsImages: false, - supportsPromptCache: false, - supportsNativeTools: true, - inputPrice: 0.1, - outputPrice: 0.5, - cacheWritesPrice: 0, - cacheReadsPrice: 0, - description: "Extremely capable general-purpose LLM with strong, controllable reasoning capabilities", - }, "moonshotai/Kimi-K2-Instruct-0905": { maxTokens: 16_384, contextWindow: 262_000, diff --git a/packages/types/src/providers/bedrock.ts b/packages/types/src/providers/bedrock.ts index 8dec37108ad..da40e98f433 100644 --- a/packages/types/src/providers/bedrock.ts +++ b/packages/types/src/providers/bedrock.ts @@ -454,22 +454,6 @@ export const bedrockModels = { outputPrice: 0.6, description: "Amazon Titan Text Express", }, - "amazon.titan-text-embeddings-v1:0": { - maxTokens: 8192, - contextWindow: 8_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.1, - description: "Amazon Titan Text Embeddings", - }, - "amazon.titan-text-embeddings-v2:0": { - maxTokens: 8192, - contextWindow: 8_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.02, - description: "Amazon Titan Text Embeddings V2", - }, "moonshot.kimi-k2-thinking": { maxTokens: 32_000, contextWindow: 262_144, diff --git a/packages/types/src/providers/featherless.ts b/packages/types/src/providers/featherless.ts index 8d9f7ae4e65..63bcb989687 100644 --- a/packages/types/src/providers/featherless.ts +++ b/packages/types/src/providers/featherless.ts @@ -13,6 +13,7 @@ export const featherlessModels = { contextWindow: 32678, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0, outputPrice: 0, description: "DeepSeek V3 0324 model.", @@ -22,6 +23,7 @@ export const featherlessModels = { contextWindow: 32678, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0, outputPrice: 0, description: "DeepSeek R1 0528 model.", @@ -41,6 +43,7 @@ export const featherlessModels = { contextWindow: 32678, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0, outputPrice: 0, description: "GPT-OSS 120B model.", @@ -57,4 +60,4 @@ export const featherlessModels = { }, } as const satisfies Record -export const featherlessDefaultModelId: FeatherlessModelId = "deepseek-ai/DeepSeek-R1-0528" +export const featherlessDefaultModelId: FeatherlessModelId = "moonshotai/Kimi-K2-Instruct" diff --git a/packages/types/src/providers/groq.ts b/packages/types/src/providers/groq.ts index a8d8ca7c98a..a22ad764eef 100644 --- a/packages/types/src/providers/groq.ts +++ b/packages/types/src/providers/groq.ts @@ -5,12 +5,7 @@ export type GroqModelId = | "llama-3.1-8b-instant" | "llama-3.3-70b-versatile" | "meta-llama/llama-4-scout-17b-16e-instruct" - | "meta-llama/llama-4-maverick-17b-128e-instruct" - | "mistral-saba-24b" - | "qwen-qwq-32b" | "qwen/qwen3-32b" - | "deepseek-r1-distill-llama-70b" - | "moonshotai/kimi-k2-instruct" | "moonshotai/kimi-k2-instruct-0905" | "openai/gpt-oss-120b" | "openai/gpt-oss-20b" @@ -52,33 +47,6 @@ export const groqModels = { outputPrice: 0.34, description: "Meta Llama 4 Scout 17B Instruct model, 128K context.", }, - "meta-llama/llama-4-maverick-17b-128e-instruct": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.2, - outputPrice: 0.6, - description: "Meta Llama 4 Maverick 17B Instruct model, 128K context.", - }, - "mistral-saba-24b": { - maxTokens: 8192, - contextWindow: 32768, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.79, - outputPrice: 0.79, - description: "Mistral Saba 24B model, 32K context.", - }, - "qwen-qwq-32b": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.29, - outputPrice: 0.39, - description: "Alibaba Qwen QwQ 32B model, 128K context.", - }, "qwen/qwen3-32b": { maxTokens: 8192, contextWindow: 131072, @@ -90,25 +58,6 @@ export const groqModels = { outputPrice: 0.59, description: "Alibaba Qwen 3 32B model, 128K context.", }, - "deepseek-r1-distill-llama-70b": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.75, - outputPrice: 0.99, - description: "DeepSeek R1 Distill Llama 70B model, 128K context.", - }, - "moonshotai/kimi-k2-instruct": { - maxTokens: 16384, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: true, - inputPrice: 1.0, - outputPrice: 3.0, - cacheReadsPrice: 0.5, // 50% discount for cached input tokens - description: "Moonshot AI Kimi K2 Instruct 1T model, 128K context.", - }, "moonshotai/kimi-k2-instruct-0905": { maxTokens: 16384, contextWindow: 262144, diff --git a/packages/types/src/providers/minimax.ts b/packages/types/src/providers/minimax.ts index aac397bf2ce..7152946f7f1 100644 --- a/packages/types/src/providers/minimax.ts +++ b/packages/types/src/providers/minimax.ts @@ -15,6 +15,8 @@ export const minimaxModels = { supportsPromptCache: true, supportsNativeTools: true, defaultToolProtocol: "native", + includedTools: ["search_and_replace"], + excludedTools: ["apply_diff"], preserveReasoning: true, inputPrice: 0.3, outputPrice: 1.2, @@ -30,6 +32,8 @@ export const minimaxModels = { supportsPromptCache: true, supportsNativeTools: true, defaultToolProtocol: "native", + includedTools: ["search_and_replace"], + excludedTools: ["apply_diff"], preserveReasoning: true, inputPrice: 0.3, outputPrice: 1.2, @@ -38,6 +42,23 @@ export const minimaxModels = { description: "MiniMax M2 Stable (High Concurrency, Commercial Use), a model born for Agents and code, featuring Top-tier Coding Capabilities, Powerful Agentic Performance, and Ultimate Cost-Effectiveness & Speed.", }, + "MiniMax-M2.1": { + maxTokens: 16_384, + contextWindow: 192_000, + supportsImages: false, + supportsPromptCache: true, + supportsNativeTools: true, + defaultToolProtocol: "native", + includedTools: ["search_and_replace"], + excludedTools: ["apply_diff"], + preserveReasoning: true, + inputPrice: 0.3, + outputPrice: 1.2, + cacheWritesPrice: 0.375, + cacheReadsPrice: 0.03, + description: + "MiniMax M2.1 builds on M2 with improved overall performance for agentic coding tasks and significantly faster response times.", + }, } as const satisfies Record export const minimaxDefaultModelInfo: ModelInfo = minimaxModels[minimaxDefaultModelId] diff --git a/packages/types/src/providers/sambanova.ts b/packages/types/src/providers/sambanova.ts index 7c04475c60f..dc592d180ca 100644 --- a/packages/types/src/providers/sambanova.ts +++ b/packages/types/src/providers/sambanova.ts @@ -7,9 +7,7 @@ export type SambaNovaModelId = | "DeepSeek-R1" | "DeepSeek-V3-0324" | "DeepSeek-V3.1" - | "DeepSeek-R1-Distill-Llama-70B" | "Llama-4-Maverick-17B-128E-Instruct" - | "Llama-3.3-Swallow-70B-Instruct-v0.4" | "Qwen3-32B" | "gpt-oss-120b" @@ -72,15 +70,6 @@ export const sambaNovaModels = { outputPrice: 4.5, description: "DeepSeek V3.1 model with 32K context window.", }, - "DeepSeek-R1-Distill-Llama-70B": { - maxTokens: 8192, - contextWindow: 131072, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.7, - outputPrice: 1.4, - description: "DeepSeek R1 distilled Llama 70B model with 128K context window.", - }, "Llama-4-Maverick-17B-128E-Instruct": { maxTokens: 8192, contextWindow: 131072, @@ -92,15 +81,6 @@ export const sambaNovaModels = { outputPrice: 1.8, description: "Meta Llama 4 Maverick 17B 128E Instruct model with 128K context window.", }, - "Llama-3.3-Swallow-70B-Instruct-v0.4": { - maxTokens: 8192, - contextWindow: 16384, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.6, - outputPrice: 1.2, - description: "Tokyotech Llama 3.3 Swallow 70B Instruct v0.4 model with 16K context window.", - }, "Qwen3-32B": { maxTokens: 8192, contextWindow: 8192, diff --git a/packages/types/src/providers/vercel-ai-gateway.ts b/packages/types/src/providers/vercel-ai-gateway.ts index 875b87bf8b5..40d4f1ca509 100644 --- a/packages/types/src/providers/vercel-ai-gateway.ts +++ b/packages/types/src/providers/vercel-ai-gateway.ts @@ -90,6 +90,7 @@ export const vercelAiGatewayDefaultModelInfo: ModelInfo = { contextWindow: 200000, supportsImages: true, supportsPromptCache: true, + supportsNativeTools: true, inputPrice: 3, outputPrice: 15, cacheWritesPrice: 3.75, diff --git a/packages/types/src/providers/vertex.ts b/packages/types/src/providers/vertex.ts index cd247efb41a..384b78de4db 100644 --- a/packages/types/src/providers/vertex.ts +++ b/packages/types/src/providers/vertex.ts @@ -477,6 +477,7 @@ export const vertexModels = { contextWindow: 131072, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0.35, outputPrice: 1.15, description: "Meta Llama 4 Maverick 17B Instruct model, 128K context.", @@ -486,6 +487,7 @@ export const vertexModels = { contextWindow: 163_840, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 1.35, outputPrice: 5.4, description: "DeepSeek R1 (0528). Available in us-central1", @@ -495,6 +497,7 @@ export const vertexModels = { contextWindow: 163_840, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0.6, outputPrice: 1.7, description: "DeepSeek V3.1. Available in us-west2", @@ -504,6 +507,7 @@ export const vertexModels = { contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0.15, outputPrice: 0.6, description: "OpenAI gpt-oss 120B. Available in us-central1", @@ -513,6 +517,7 @@ export const vertexModels = { contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0.075, outputPrice: 0.3, description: "OpenAI gpt-oss 20B. Available in us-central1", @@ -522,6 +527,7 @@ export const vertexModels = { contextWindow: 262_144, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 1.0, outputPrice: 4.0, description: "Qwen3 Coder 480B A35B Instruct. Available in us-south1", @@ -531,6 +537,7 @@ export const vertexModels = { contextWindow: 262_144, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0.25, outputPrice: 1.0, description: "Qwen3 235B A22B Instruct. Available in us-south1", diff --git a/packages/types/src/providers/zai.ts b/packages/types/src/providers/zai.ts index 1dfface9f6c..93cf9bb23bc 100644 --- a/packages/types/src/providers/zai.ts +++ b/packages/types/src/providers/zai.ts @@ -12,7 +12,7 @@ export type InternationalZAiModelId = keyof typeof internationalZAiModels export const internationalZAiDefaultModelId: InternationalZAiModelId = "glm-4.6" export const internationalZAiModels = { "glm-4.5": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -26,7 +26,7 @@ export const internationalZAiModels = { "GLM-4.5 is Zhipu's latest featured model. Its comprehensive capabilities in reasoning, coding, and agent reach the state-of-the-art (SOTA) level among open-source models, with a context length of up to 128k.", }, "glm-4.5-air": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -40,7 +40,7 @@ export const internationalZAiModels = { "GLM-4.5-Air is the lightweight version of GLM-4.5. It balances performance and cost-effectiveness, and can flexibly switch to hybrid thinking models.", }, "glm-4.5-x": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -54,7 +54,7 @@ export const internationalZAiModels = { "GLM-4.5-X is a high-performance variant optimized for strong reasoning with ultra-fast responses.", }, "glm-4.5-airx": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -67,7 +67,7 @@ export const internationalZAiModels = { description: "GLM-4.5-AirX is a lightweight, ultra-fast variant delivering strong performance with lower cost.", }, "glm-4.5-flash": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -94,7 +94,7 @@ export const internationalZAiModels = { "GLM-4.5V is Z.AI's multimodal visual reasoning model (image/video/text/file input), optimized for GUI tasks, grounding, and document/video understanding.", }, "glm-4.6": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 200_000, supportsImages: false, supportsPromptCache: true, @@ -107,8 +107,25 @@ export const internationalZAiModels = { description: "GLM-4.6 is Zhipu's newest model with an extended context window of up to 200k tokens, providing enhanced capabilities for processing longer documents and conversations.", }, + "glm-4.7": { + maxTokens: 16_384, + contextWindow: 200_000, + supportsImages: false, + supportsPromptCache: true, + supportsNativeTools: true, + defaultToolProtocol: "native", + supportsReasoningEffort: ["disable", "medium"], + reasoningEffort: "medium", + preserveReasoning: true, + inputPrice: 0.6, + outputPrice: 2.2, + cacheWritesPrice: 0, + cacheReadsPrice: 0.11, + description: + "GLM-4.7 is Zhipu's latest model with built-in thinking capabilities enabled by default. It provides enhanced reasoning for complex tasks while maintaining fast response times.", + }, "glm-4-32b-0414-128k": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, @@ -126,7 +143,7 @@ export type MainlandZAiModelId = keyof typeof mainlandZAiModels export const mainlandZAiDefaultModelId: MainlandZAiModelId = "glm-4.6" export const mainlandZAiModels = { "glm-4.5": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -140,7 +157,7 @@ export const mainlandZAiModels = { "GLM-4.5 is Zhipu's latest featured model. Its comprehensive capabilities in reasoning, coding, and agent reach the state-of-the-art (SOTA) level among open-source models, with a context length of up to 128k.", }, "glm-4.5-air": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -154,7 +171,7 @@ export const mainlandZAiModels = { "GLM-4.5-Air is the lightweight version of GLM-4.5. It balances performance and cost-effectiveness, and can flexibly switch to hybrid thinking models.", }, "glm-4.5-x": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -168,7 +185,7 @@ export const mainlandZAiModels = { "GLM-4.5-X is a high-performance variant optimized for strong reasoning with ultra-fast responses.", }, "glm-4.5-airx": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -181,7 +198,7 @@ export const mainlandZAiModels = { description: "GLM-4.5-AirX is a lightweight, ultra-fast variant delivering strong performance with lower cost.", }, "glm-4.5-flash": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -208,7 +225,7 @@ export const mainlandZAiModels = { "GLM-4.5V is Z.AI's multimodal visual reasoning model (image/video/text/file input), optimized for GUI tasks, grounding, and document/video understanding.", }, "glm-4.6": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 204_800, supportsImages: false, supportsPromptCache: true, @@ -221,6 +238,23 @@ export const mainlandZAiModels = { description: "GLM-4.6 is Zhipu's newest model with an extended context window of up to 200k tokens, providing enhanced capabilities for processing longer documents and conversations.", }, + "glm-4.7": { + maxTokens: 16_384, + contextWindow: 204_800, + supportsImages: false, + supportsPromptCache: true, + supportsNativeTools: true, + defaultToolProtocol: "native", + supportsReasoningEffort: ["disable", "medium"], + reasoningEffort: "medium", + preserveReasoning: true, + inputPrice: 0.29, + outputPrice: 1.14, + cacheWritesPrice: 0, + cacheReadsPrice: 0.057, + description: + "GLM-4.7 is Zhipu's latest model with built-in thinking capabilities enabled by default. It provides enhanced reasoning for complex tasks while maintaining fast response times.", + }, } as const satisfies Record export const ZAI_DEFAULT_TEMPERATURE = 0.6 diff --git a/src/api/providers/__tests__/anthropic-vertex.spec.ts b/src/api/providers/__tests__/anthropic-vertex.spec.ts index c76dc60f5f5..1a1e3c87c53 100644 --- a/src/api/providers/__tests__/anthropic-vertex.spec.ts +++ b/src/api/providers/__tests__/anthropic-vertex.spec.ts @@ -1203,7 +1203,8 @@ describe("VertexHandler", () => { ) }) - it("should not include tools when toolProtocol is xml", async () => { + it("should include tools even when toolProtocol is set to xml (user preference now ignored)", async () => { + // XML protocol deprecation: user preference is now ignored when model supports native tools handler = new AnthropicVertexHandler({ apiModelId: "claude-3-5-sonnet-v2@20241022", vertexProjectId: "test-project", @@ -1244,9 +1245,14 @@ describe("VertexHandler", () => { // Just consume } + // Native is forced when supportsNativeTools===true, so tools should still be included expect(mockCreate).toHaveBeenCalledWith( - expect.not.objectContaining({ - tools: expect.anything(), + expect.objectContaining({ + tools: expect.arrayContaining([ + expect.objectContaining({ + name: "get_weather", + }), + ]), }), { signal: undefined }, ) diff --git a/src/api/providers/__tests__/anthropic.spec.ts b/src/api/providers/__tests__/anthropic.spec.ts index d9d2fb302be..5832b963523 100644 --- a/src/api/providers/__tests__/anthropic.spec.ts +++ b/src/api/providers/__tests__/anthropic.spec.ts @@ -454,8 +454,8 @@ describe("AnthropicHandler", () => { ) }) - it("should not include tools when toolProtocol is xml", async () => { - // Create handler with xml tool protocol in options + it("should include tools even when toolProtocol is set to xml (user preference now ignored)", async () => { + // XML protocol deprecation: user preference is now ignored when model supports native tools const xmlHandler = new AnthropicHandler({ ...mockOptions, toolProtocol: "xml", @@ -471,9 +471,14 @@ describe("AnthropicHandler", () => { // Just consume } + // Native is forced when supportsNativeTools===true, so tools should still be included expect(mockCreate).toHaveBeenCalledWith( - expect.not.objectContaining({ - tools: expect.anything(), + expect.objectContaining({ + tools: expect.arrayContaining([ + expect.objectContaining({ + name: "get_weather", + }), + ]), }), expect.anything(), ) diff --git a/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts b/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts index 57ee649d6e5..7d0d2548fce 100644 --- a/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts +++ b/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts @@ -383,4 +383,166 @@ describe("BaseOpenAiCompatibleProvider", () => { expect(firstChunk.value).toMatchObject({ type: "usage", inputTokens: 100, outputTokens: 50 }) }) }) + + describe("Tool call handling", () => { + it("should yield tool_call_end events when finish_reason is tool_calls", async () => { + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + id: "call_123", + function: { name: "test_tool", arguments: '{"arg":' }, + }, + ], + }, + }, + ], + }, + }) + .mockResolvedValueOnce({ + done: false, + value: { + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + function: { arguments: '"value"}' }, + }, + ], + }, + }, + ], + }, + }) + .mockResolvedValueOnce({ + done: false, + value: { + choices: [ + { + delta: {}, + finish_reason: "tool_calls", + }, + ], + }, + }) + .mockResolvedValueOnce({ done: true }), + }), + } + }) + + const stream = handler.createMessage("system prompt", []) + const chunks = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Should have tool_call_partial and tool_call_end + const partialChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial") + const endChunks = chunks.filter((chunk) => chunk.type === "tool_call_end") + + expect(partialChunks).toHaveLength(2) + expect(endChunks).toHaveLength(1) + expect(endChunks[0]).toEqual({ type: "tool_call_end", id: "call_123" }) + }) + + it("should yield multiple tool_call_end events for parallel tool calls", async () => { + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + id: "call_001", + function: { name: "tool_a", arguments: "{}" }, + }, + { + index: 1, + id: "call_002", + function: { name: "tool_b", arguments: "{}" }, + }, + ], + }, + }, + ], + }, + }) + .mockResolvedValueOnce({ + done: false, + value: { + choices: [ + { + delta: {}, + finish_reason: "tool_calls", + }, + ], + }, + }) + .mockResolvedValueOnce({ done: true }), + }), + } + }) + + const stream = handler.createMessage("system prompt", []) + const chunks = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const endChunks = chunks.filter((chunk) => chunk.type === "tool_call_end") + expect(endChunks).toHaveLength(2) + expect(endChunks.map((c: any) => c.id).sort()).toEqual(["call_001", "call_002"]) + }) + + it("should not yield tool_call_end when finish_reason is not tool_calls", async () => { + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { + choices: [ + { + delta: { content: "Some text response" }, + finish_reason: "stop", + }, + ], + }, + }) + .mockResolvedValueOnce({ done: true }), + }), + } + }) + + const stream = handler.createMessage("system prompt", []) + const chunks = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const endChunks = chunks.filter((chunk) => chunk.type === "tool_call_end") + expect(endChunks).toHaveLength(0) + }) + }) }) diff --git a/src/api/providers/__tests__/featherless.spec.ts b/src/api/providers/__tests__/featherless.spec.ts index 00518828851..936c10fcd09 100644 --- a/src/api/providers/__tests__/featherless.spec.ts +++ b/src/api/providers/__tests__/featherless.spec.ts @@ -3,12 +3,7 @@ import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" -import { - type FeatherlessModelId, - featherlessDefaultModelId, - featherlessModels, - DEEP_SEEK_DEFAULT_TEMPERATURE, -} from "@roo-code/types" +import { type FeatherlessModelId, featherlessDefaultModelId, featherlessModels } from "@roo-code/types" import { FeatherlessHandler } from "../featherless" @@ -76,7 +71,7 @@ describe("FeatherlessHandler", () => { expect(OpenAI).toHaveBeenCalledWith(expect.objectContaining({ apiKey: featherlessApiKey })) }) - it("should handle DeepSeek R1 reasoning format", async () => { + it("should handle reasoning format from models that use tags", async () => { // Override the mock for this specific test mockCreate.mockImplementationOnce(async () => ({ [Symbol.asyncIterator]: async function* () { @@ -113,7 +108,7 @@ describe("FeatherlessHandler", () => { const systemPrompt = "You are a helpful assistant." const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] vi.spyOn(handler, "getModel").mockReturnValue({ - id: "deepseek-ai/DeepSeek-R1-0528", + id: "some-reasoning-model", info: { maxTokens: 1024, temperature: 0.7 }, } as any) @@ -154,7 +149,7 @@ describe("FeatherlessHandler", () => { }) it("should return specified model when valid model is provided", () => { - const testModelId: FeatherlessModelId = "deepseek-ai/DeepSeek-R1-0528" + const testModelId: FeatherlessModelId = "moonshotai/Kimi-K2-Instruct" const handlerWithModel = new FeatherlessHandler({ apiModelId: testModelId, featherlessApiKey: "test-featherless-api-key", @@ -225,8 +220,8 @@ describe("FeatherlessHandler", () => { expect(firstChunk.value).toMatchObject({ type: "usage", inputTokens: 10, outputTokens: 20 }) }) - it("createMessage should pass correct parameters to Featherless client for DeepSeek R1", async () => { - const modelId: FeatherlessModelId = "deepseek-ai/DeepSeek-R1-0528" + it("createMessage should pass correct parameters to Featherless client", async () => { + const modelId: FeatherlessModelId = "moonshotai/Kimi-K2-Instruct" // Clear previous mocks and set up new implementation mockCreate.mockClear() @@ -247,27 +242,9 @@ describe("FeatherlessHandler", () => { const messageGenerator = handlerWithModel.createMessage(systemPrompt, messages) await messageGenerator.next() - expect(mockCreate).toHaveBeenCalledWith( - expect.objectContaining({ - model: modelId, - messages: [ - { - role: "user", - content: `${systemPrompt}\n${messages[0].content}`, - }, - ], - }), - ) - }) - - it("should apply DeepSeek default temperature for R1 models", () => { - const testModelId: FeatherlessModelId = "deepseek-ai/DeepSeek-R1-0528" - const handlerWithModel = new FeatherlessHandler({ - apiModelId: testModelId, - featherlessApiKey: "test-featherless-api-key", - }) - const model = handlerWithModel.getModel() - expect(model.info.temperature).toBe(DEEP_SEEK_DEFAULT_TEMPERATURE) + expect(mockCreate).toHaveBeenCalled() + const callArgs = mockCreate.mock.calls[0][0] + expect(callArgs.model).toBe(modelId) }) it("should use default temperature for non-DeepSeek models", () => { diff --git a/src/api/providers/__tests__/zai.spec.ts b/src/api/providers/__tests__/zai.spec.ts index 707abee09f6..34323b108d3 100644 --- a/src/api/providers/__tests__/zai.spec.ts +++ b/src/api/providers/__tests__/zai.spec.ts @@ -82,6 +82,22 @@ describe("ZAiHandler", () => { expect(model.info.contextWindow).toBe(200_000) }) + it("should return GLM-4.7 international model with thinking support", () => { + const testModelId: InternationalZAiModelId = "glm-4.7" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(internationalZAiModels[testModelId]) + expect(model.info.contextWindow).toBe(200_000) + expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"]) + expect(model.info.reasoningEffort).toBe("medium") + expect(model.info.preserveReasoning).toBe(true) + }) + it("should return GLM-4.5v international model with vision support", () => { const testModelId: InternationalZAiModelId = "glm-4.5v" const handlerWithModel = new ZAiHandler({ @@ -161,6 +177,22 @@ describe("ZAiHandler", () => { expect(model.info.maxTokens).toBe(16_384) expect(model.info.contextWindow).toBe(131_072) }) + + it("should return GLM-4.7 China model with thinking support", () => { + const testModelId: MainlandZAiModelId = "glm-4.7" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "china_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(mainlandZAiModels[testModelId]) + expect(model.info.contextWindow).toBe(204_800) + expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"]) + expect(model.info.reasoningEffort).toBe("medium") + expect(model.info.preserveReasoning).toBe(true) + }) }) describe("International API", () => { @@ -371,4 +403,123 @@ describe("ZAiHandler", () => { ) }) }) + + describe("GLM-4.7 Thinking Mode", () => { + it("should enable thinking by default for GLM-4.7 (default reasoningEffort is medium)", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-4.7", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + // No reasoningEffort setting - should use model default (medium) + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + // For GLM-4.7 with default reasoning (medium), thinking should be enabled + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-4.7", + thinking: { type: "enabled" }, + }), + ) + }) + + it("should disable thinking for GLM-4.7 when reasoningEffort is set to disable", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-4.7", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + enableReasoningEffort: true, + reasoningEffort: "disable", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + // For GLM-4.7 with reasoning disabled, thinking should be disabled + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-4.7", + thinking: { type: "disabled" }, + }), + ) + }) + + it("should enable thinking for GLM-4.7 when reasoningEffort is set to medium", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-4.7", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + enableReasoningEffort: true, + reasoningEffort: "medium", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + // For GLM-4.7 with reasoning set to medium, thinking should be enabled + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-4.7", + thinking: { type: "enabled" }, + }), + ) + }) + + it("should NOT add thinking parameter for non-thinking models like GLM-4.6", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-4.6", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + // For GLM-4.6 (no thinking support), thinking parameter should not be present + const callArgs = mockCreate.mock.calls[0][0] + expect(callArgs.thinking).toBeUndefined() + }) + }) }) diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts index b78fe5add78..d832508f635 100644 --- a/src/api/providers/base-openai-compatible-provider.ts +++ b/src/api/providers/base-openai-compatible-provider.ts @@ -129,6 +129,7 @@ export abstract class BaseOpenAiCompatibleProvider ) let lastUsage: OpenAI.CompletionUsage | undefined + const activeToolCallIds = new Set() for await (const chunk of stream) { // Check for provider-specific error responses (e.g., MiniMax base_resp) @@ -140,6 +141,7 @@ export abstract class BaseOpenAiCompatibleProvider } const delta = chunk.choices?.[0]?.delta + const finishReason = chunk.choices?.[0]?.finish_reason if (delta?.content) { for (const processedChunk of matcher.update(delta.content)) { @@ -162,6 +164,9 @@ export abstract class BaseOpenAiCompatibleProvider // Emit raw tool call chunks - NativeToolCallParser handles state management if (delta?.tool_calls) { for (const toolCall of delta.tool_calls) { + if (toolCall.id) { + activeToolCallIds.add(toolCall.id) + } yield { type: "tool_call_partial", index: toolCall.index, @@ -172,6 +177,15 @@ export abstract class BaseOpenAiCompatibleProvider } } + // Emit tool_call_end events when finish_reason is "tool_calls" + // This ensures tool calls are finalized even if the stream doesn't properly close + if (finishReason === "tool_calls" && activeToolCallIds.size > 0) { + for (const id of activeToolCallIds) { + yield { type: "tool_call_end", id } + } + activeToolCallIds.clear() + } + if (chunk.usage) { lastUsage = chunk.usage } diff --git a/src/api/providers/minimax.ts b/src/api/providers/minimax.ts index 12d7934546e..a7cea478ed0 100644 --- a/src/api/providers/minimax.ts +++ b/src/api/providers/minimax.ts @@ -9,6 +9,7 @@ import type { ApiHandlerOptions } from "../../shared/api" import { ApiStream } from "../transform/stream" import { getModelParams } from "../transform/model-params" +import { mergeEnvironmentDetailsForMiniMax } from "../transform/minimax-format" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" @@ -87,15 +88,26 @@ export class MiniMaxHandler extends BaseProvider implements SingleCompletionHand // MiniMax M2 models support prompt caching const supportsPromptCache = info.supportsPromptCache ?? false + // Merge environment_details from messages that follow tool_result blocks + // into the tool_result content. This preserves reasoning continuity for + // thinking models by preventing user messages from interrupting the + // reasoning context after tool use (similar to r1-format's mergeToolResultText). + const processedMessages = mergeEnvironmentDetailsForMiniMax(messages) + + // Build the system blocks array + const systemBlocks: Anthropic.Messages.TextBlockParam[] = [ + supportsPromptCache + ? { text: systemPrompt, type: "text", cache_control: cacheControl } + : { text: systemPrompt, type: "text" }, + ] + // Prepare request parameters const requestParams: Anthropic.Messages.MessageCreateParams = { model: modelId, max_tokens: maxTokens ?? 16_384, temperature: temperature ?? 1.0, - system: supportsPromptCache - ? [{ text: systemPrompt, type: "text", cache_control: cacheControl }] - : [{ text: systemPrompt, type: "text" }], - messages: supportsPromptCache ? this.addCacheControl(messages, cacheControl) : messages, + system: systemBlocks, + messages: supportsPromptCache ? this.addCacheControl(processedMessages, cacheControl) : processedMessages, stream: true, } diff --git a/src/api/providers/roo.ts b/src/api/providers/roo.ts index 387a7cde667..1431705f053 100644 --- a/src/api/providers/roo.ts +++ b/src/api/providers/roo.ts @@ -180,7 +180,10 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { if (deltaWithReasoning.reasoning_details && Array.isArray(deltaWithReasoning.reasoning_details)) { for (const detail of deltaWithReasoning.reasoning_details) { const index = detail.index ?? 0 - const key = `${detail.type}-${index}` + // Use id as key when available to merge chunks that share the same reasoning block id + // This ensures that reasoning.summary and reasoning.encrypted chunks with the same id + // are merged into a single object, matching the provider's expected format + const key = detail.id ?? `${detail.type}-${index}` const existing = reasoningDetailsAccumulator.get(key) if (existing) { @@ -195,6 +198,8 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { existing.data = (existing.data || "") + detail.data } // Update other fields if provided + // Note: Don't update type - keep original type (e.g., reasoning.summary) + // even when encrypted data chunks arrive with type reasoning.encrypted if (detail.id !== undefined) existing.id = detail.id if (detail.format !== undefined) existing.format = detail.format if (detail.signature !== undefined) existing.signature = detail.signature diff --git a/src/api/providers/utils/response-render-config.ts b/src/api/providers/utils/response-render-config.ts index d1e10d8b131..a9c87241583 100644 --- a/src/api/providers/utils/response-render-config.ts +++ b/src/api/providers/utils/response-render-config.ts @@ -4,19 +4,19 @@ import { isJetbrainsPlatform } from "../../../utils/platform" export const renderModes = { noLimit: { limit: 0, - interval: 16, + interval: 10, }, fast: { limit: 1, - interval: 50, + interval: 25, }, medium: { limit: 5, - interval: 100, + interval: 50, }, slow: { limit: 10, - interval: 150, + interval: 100, }, } diff --git a/src/api/providers/zai.ts b/src/api/providers/zai.ts index 25074f5f0b9..c7bf6d635e8 100644 --- a/src/api/providers/zai.ts +++ b/src/api/providers/zai.ts @@ -1,3 +1,6 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import OpenAI from "openai" + import { internationalZAiModels, mainlandZAiModels, @@ -8,10 +11,17 @@ import { zaiApiLineConfigs, } from "@roo-code/types" -import type { ApiHandlerOptions } from "../../shared/api" +import { type ApiHandlerOptions, getModelMaxOutputTokens, shouldUseReasoningEffort } from "../../shared/api" +import { convertToZAiFormat } from "../transform/zai-format" +import type { ApiHandlerCreateMessageMetadata } from "../index" import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider" +// Custom interface for Z.ai params to support thinking mode +type ZAiChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParamsStreaming & { + thinking?: { type: "enabled" | "disabled" } +} + export class ZAiHandler extends BaseOpenAiCompatibleProvider { constructor(options: ApiHandlerOptions) { const isChina = zaiApiLineConfigs[options.zaiApiLine ?? "international_coding"].isChina @@ -28,4 +38,76 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { defaultTemperature: ZAI_DEFAULT_TEMPERATURE, }) } + + /** + * Override createStream to handle GLM-4.7's thinking mode. + * GLM-4.7 has thinking enabled by default in the API, so we need to + * explicitly send { type: "disabled" } when the user turns off reasoning. + */ + protected override createStream( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + requestOptions?: OpenAI.RequestOptions, + ) { + const { id: modelId, info } = this.getModel() + + // Check if this is a GLM-4.7 model with thinking support + const isThinkingModel = modelId === "glm-4.7" && Array.isArray(info.supportsReasoningEffort) + + if (isThinkingModel) { + // For GLM-4.7, thinking is ON by default in the API. + // We need to explicitly disable it when reasoning is off. + const useReasoning = shouldUseReasoningEffort({ model: info, settings: this.options }) + + // Create the stream with our custom thinking parameter + return this.createStreamWithThinking(systemPrompt, messages, metadata, useReasoning) + } + + // For non-thinking models, use the default behavior + return super.createStream(systemPrompt, messages, metadata, requestOptions) + } + + /** + * Creates a stream with explicit thinking control for GLM-4.7 + */ + private createStreamWithThinking( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + useReasoning?: boolean, + ) { + const { id: model, info } = this.getModel() + + const max_tokens = + getModelMaxOutputTokens({ + modelId: model, + model: info, + settings: this.options, + format: "openai", + }) ?? undefined + + const temperature = this.options.modelTemperature ?? this.defaultTemperature + + // Use Z.ai format to preserve reasoning_content and merge post-tool text into tool messages + const convertedMessages = convertToZAiFormat(messages, { mergeToolResultText: true }) + + const params: ZAiChatCompletionParams = { + model, + max_tokens, + temperature, + messages: [{ role: "system", content: systemPrompt }, ...convertedMessages], + stream: true, + stream_options: { include_usage: true }, + // For GLM-4.7: thinking is ON by default, so we explicitly disable when needed + thinking: useReasoning ? { type: "enabled" } : { type: "disabled" }, + ...(metadata?.tools && { tools: this.convertToolsForOpenAI(metadata.tools) }), + ...(metadata?.tool_choice && { tool_choice: metadata.tool_choice }), + ...(metadata?.toolProtocol === "native" && { + parallel_tool_calls: metadata.parallelToolCalls ?? false, + }), + } + + return this.client.chat.completions.create(params) + } } diff --git a/src/api/transform/__tests__/minimax-format.spec.ts b/src/api/transform/__tests__/minimax-format.spec.ts new file mode 100644 index 00000000000..271dfb51052 --- /dev/null +++ b/src/api/transform/__tests__/minimax-format.spec.ts @@ -0,0 +1,336 @@ +// npx vitest run api/transform/__tests__/minimax-format.spec.ts + +import { Anthropic } from "@anthropic-ai/sdk" + +import { mergeEnvironmentDetailsForMiniMax } from "../minimax-format" + +describe("mergeEnvironmentDetailsForMiniMax", () => { + it("should pass through simple text messages unchanged", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: "Hello", + }, + { + role: "assistant", + content: "Hi there!", + }, + ] + + const result = mergeEnvironmentDetailsForMiniMax(messages) + + expect(result).toHaveLength(2) + expect(result).toEqual(messages) + }) + + it("should pass through user messages with only tool_result blocks unchanged", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result content", + }, + ], + }, + ] + + const result = mergeEnvironmentDetailsForMiniMax(messages) + + expect(result).toHaveLength(1) + expect(result).toEqual(messages) + }) + + it("should pass through user messages with only text blocks unchanged", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "Some user message", + }, + ], + }, + ] + + const result = mergeEnvironmentDetailsForMiniMax(messages) + + expect(result).toHaveLength(1) + expect(result).toEqual(messages) + }) + + it("should merge text content into last tool_result when both tool_result AND text blocks exist", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result content", + }, + { + type: "text", + text: "\nCurrent Time: 2024-01-01\n", + }, + ], + }, + ] + + const result = mergeEnvironmentDetailsForMiniMax(messages) + + // The message should have only tool_result with merged content + expect(result).toHaveLength(1) + expect(result[0].role).toBe("user") + const content = result[0].content as Anthropic.Messages.ToolResultBlockParam[] + expect(content).toHaveLength(1) + expect(content[0].type).toBe("tool_result") + expect(content[0].tool_use_id).toBe("tool-123") + expect(content[0].content).toBe( + "Tool result content\n\n\nCurrent Time: 2024-01-01\n", + ) + }) + + it("should merge multiple text blocks into last tool_result", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result 1", + }, + { + type: "text", + text: "First text block", + }, + { + type: "tool_result", + tool_use_id: "tool-456", + content: "Tool result 2", + }, + { + type: "text", + text: "Second text block", + }, + ], + }, + ] + + const result = mergeEnvironmentDetailsForMiniMax(messages) + + // The message should have only tool_result blocks, with text merged into the last one + expect(result).toHaveLength(1) + const content = result[0].content as Anthropic.Messages.ToolResultBlockParam[] + expect(content).toHaveLength(2) + expect(content[0].type).toBe("tool_result") + expect(content[0].content).toBe("Tool result 1") // First one unchanged + expect(content[1].type).toBe("tool_result") + expect(content[1].content).toBe("Tool result 2\n\nFirst text block\n\nSecond text block") // Second has merged text + }) + + it("should NOT merge text when images are present (cannot move images to tool_result)", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result content", + }, + { + type: "text", + text: "Some text", + }, + { + type: "image", + source: { + type: "base64", + media_type: "image/png", + data: "base64data", + }, + }, + ], + }, + ] + + const result = mergeEnvironmentDetailsForMiniMax(messages) + + // Message should be unchanged since images are present + expect(result).toHaveLength(1) + expect(result).toEqual(messages) + }) + + it("should pass through assistant messages unchanged", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "assistant", + content: [ + { + type: "text", + text: "I will help you with that.", + }, + { + type: "tool_use", + id: "tool-123", + name: "read_file", + input: { path: "test.ts" }, + }, + ], + }, + ] + + const result = mergeEnvironmentDetailsForMiniMax(messages) + + expect(result).toHaveLength(1) + expect(result).toEqual(messages) + }) + + it("should handle mixed conversation with merging only for eligible messages", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: "Create a file", + }, + { + role: "assistant", + content: [ + { + type: "text", + text: "I'll create the file.", + }, + { + type: "tool_use", + id: "tool-123", + name: "write_file", + input: { path: "test.ts", content: "// test" }, + }, + ], + }, + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "File created successfully", + }, + { + type: "text", + text: "\nCurrent Time: 2024-01-01\n", + }, + ], + }, + { + role: "assistant", + content: "The file has been created.", + }, + ] + + const result = mergeEnvironmentDetailsForMiniMax(messages) + + // Should have all 4 messages + expect(result).toHaveLength(4) + + // First user message unchanged (simple string) + expect(result[0]).toEqual(messages[0]) + + // Assistant message unchanged + expect(result[1]).toEqual(messages[1]) + + // Third message should have tool_result with merged environment_details + const thirdMessage = result[2].content as Anthropic.Messages.ToolResultBlockParam[] + expect(thirdMessage).toHaveLength(1) + expect(thirdMessage[0].type).toBe("tool_result") + expect(thirdMessage[0].content).toContain("File created successfully") + expect(thirdMessage[0].content).toContain("environment_details") + + // Fourth message unchanged + expect(result[3]).toEqual(messages[3]) + }) + + it("should handle string content in user messages", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: "Just a string message", + }, + ] + + const result = mergeEnvironmentDetailsForMiniMax(messages) + + expect(result).toHaveLength(1) + expect(result).toEqual(messages) + }) + + it("should handle empty messages array", () => { + const messages: Anthropic.Messages.MessageParam[] = [] + + const result = mergeEnvironmentDetailsForMiniMax(messages) + + expect(result).toHaveLength(0) + }) + + it("should handle tool_result with array content", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: [ + { type: "text", text: "Part 1" }, + { type: "text", text: "Part 2" }, + ], + }, + { + type: "text", + text: "Context", + }, + ], + }, + ] + + const result = mergeEnvironmentDetailsForMiniMax(messages) + + expect(result).toHaveLength(1) + const content = result[0].content as Anthropic.Messages.ToolResultBlockParam[] + expect(content).toHaveLength(1) + expect(content[0].type).toBe("tool_result") + // Array content should be concatenated and then merged with text + expect(content[0].content).toBe("Part 1\nPart 2\n\nContext") + }) + + it("should handle tool_result with empty content", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "", + }, + { + type: "text", + text: "Context", + }, + ], + }, + ] + + const result = mergeEnvironmentDetailsForMiniMax(messages) + + expect(result).toHaveLength(1) + const content = result[0].content as Anthropic.Messages.ToolResultBlockParam[] + expect(content).toHaveLength(1) + expect(content[0].type).toBe("tool_result") + expect(content[0].content).toBe("Context") + }) +}) diff --git a/src/api/transform/minimax-format.ts b/src/api/transform/minimax-format.ts new file mode 100644 index 00000000000..32a32a4437e --- /dev/null +++ b/src/api/transform/minimax-format.ts @@ -0,0 +1,118 @@ +import { Anthropic } from "@anthropic-ai/sdk" + +type ContentBlock = Anthropic.Messages.ContentBlockParam + +/** + * Merges text content (like environment_details) that follows tool_result blocks + * into the last tool_result's content. This preserves reasoning continuity for + * thinking models by avoiding separate user messages after tool results. + * + * Key behavior: + * - User messages with ONLY tool_result blocks: keep as-is + * - User messages with ONLY text/image: keep as-is + * - User messages with tool_result blocks AND text blocks: merge the text blocks + * into the last tool_result's content + * + * @param messages Array of Anthropic messages + * @returns Modified messages with text merged into tool_result content + */ +export function mergeEnvironmentDetailsForMiniMax( + messages: Anthropic.Messages.MessageParam[], +): Anthropic.Messages.MessageParam[] { + const result: Anthropic.Messages.MessageParam[] = [] + + for (const message of messages) { + if (message.role === "user") { + if (typeof message.content === "string") { + // Simple string content - keep as-is + result.push(message) + } else if (Array.isArray(message.content)) { + // Check if this message has both tool_result blocks and text blocks + const toolResultBlocks: Anthropic.Messages.ToolResultBlockParam[] = [] + const textBlocks: Anthropic.Messages.TextBlockParam[] = [] + const imageBlocks: Anthropic.Messages.ImageBlockParam[] = [] + + for (const block of message.content) { + if (block.type === "tool_result") { + toolResultBlocks.push(block) + } else if (block.type === "text") { + textBlocks.push(block) + } else if (block.type === "image") { + imageBlocks.push(block) + } + } + + // If we have tool_result blocks AND text blocks (like environment_details), + // merge the text into the last tool_result's content + const hasToolResults = toolResultBlocks.length > 0 + const hasTextBlocks = textBlocks.length > 0 + const hasImageBlocks = imageBlocks.length > 0 + + if (hasToolResults && hasTextBlocks && !hasImageBlocks) { + // Merge text content into the last tool_result + const textContent = textBlocks.map((b) => b.text).join("\n\n") + const modifiedToolResults = [...toolResultBlocks] + const lastToolResult = modifiedToolResults[modifiedToolResults.length - 1] + + // Get existing content as string + let existingContent: string + if (typeof lastToolResult.content === "string") { + existingContent = lastToolResult.content + } else if (Array.isArray(lastToolResult.content)) { + existingContent = + lastToolResult.content + ?.map((c) => { + if (c.type === "text") return c.text + if (c.type === "image") return "(image)" + return "" + }) + .join("\n") ?? "" + } else { + existingContent = "" + } + + // Merge text into the last tool_result + modifiedToolResults[modifiedToolResults.length - 1] = { + ...lastToolResult, + content: existingContent ? `${existingContent}\n\n${textContent}` : textContent, + } + + result.push({ + ...message, + content: modifiedToolResults as ContentBlock[], + }) + } else { + // Keep the message as-is if: + // - Only tool_result blocks (no text to merge) + // - Only text/image blocks (no tool results) + // - Has images (can't merge into tool_result) + result.push(message) + } + } else { + // Unknown format - keep as-is + result.push(message) + } + } else { + // Assistant messages - keep as-is + result.push(message) + } + } + + return result +} + +/** + * @deprecated Use mergeEnvironmentDetailsForMiniMax instead. This function extracted + * environment_details to the system prompt, but the new approach merges them into + * tool_result content like r1-format does with mergeToolResultText. + */ +export function extractEnvironmentDetailsForMiniMax(messages: Anthropic.Messages.MessageParam[]): { + messages: Anthropic.Messages.MessageParam[] + extractedSystemContent: string[] +} { + // For backwards compatibility, just return the merged messages with empty extracted content + return { + messages: mergeEnvironmentDetailsForMiniMax(messages), + extractedSystemContent: [], + } +} diff --git a/src/api/transform/openai-format.ts b/src/api/transform/openai-format.ts index 56d6441c48b..7ca4ddb993c 100644 --- a/src/api/transform/openai-format.ts +++ b/src/api/transform/openai-format.ts @@ -150,16 +150,28 @@ export function convertToOpenAiMessages( // Check if the message has reasoning_details (used by Gemini 3, etc.) const messageWithDetails = anthropicMessage as any - const baseMessage: OpenAI.Chat.ChatCompletionAssistantMessageParam = { + + // Build message with reasoning_details BEFORE tool_calls to preserve + // the order expected by providers like Roo. Property order matters + // when sending messages back to some APIs. + const baseMessage: OpenAI.Chat.ChatCompletionAssistantMessageParam & { reasoning_details?: any[] } = { role: "assistant", content, - // Cannot be an empty array. API expects an array with minimum length 1, and will respond with an error if it's empty - tool_calls: tool_calls.length > 0 ? tool_calls : undefined, } - // Preserve reasoning_details if present (will be processed by provider if needed) + // Add reasoning_details first (before tool_calls) to preserve provider-expected order + // Strip the id field from each reasoning detail as it's only used internally for accumulation if (messageWithDetails.reasoning_details && Array.isArray(messageWithDetails.reasoning_details)) { - ;(baseMessage as any).reasoning_details = messageWithDetails.reasoning_details + baseMessage.reasoning_details = messageWithDetails.reasoning_details.map((detail: any) => { + const { id, ...rest } = detail + return rest + }) + } + + // Add tool_calls after reasoning_details + // Cannot be an empty array. API expects an array with minimum length 1, and will respond with an error if it's empty + if (tool_calls.length > 0) { + baseMessage.tool_calls = tool_calls } openAiMessages.push(baseMessage) diff --git a/src/api/transform/zai-format.ts b/src/api/transform/zai-format.ts new file mode 100644 index 00000000000..79b2e88aeb2 --- /dev/null +++ b/src/api/transform/zai-format.ts @@ -0,0 +1,242 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import OpenAI from "openai" + +type ContentPartText = OpenAI.Chat.ChatCompletionContentPartText +type ContentPartImage = OpenAI.Chat.ChatCompletionContentPartImage +type UserMessage = OpenAI.Chat.ChatCompletionUserMessageParam +type AssistantMessage = OpenAI.Chat.ChatCompletionAssistantMessageParam +type SystemMessage = OpenAI.Chat.ChatCompletionSystemMessageParam +type ToolMessage = OpenAI.Chat.ChatCompletionToolMessageParam +type Message = OpenAI.Chat.ChatCompletionMessageParam +type AnthropicMessage = Anthropic.Messages.MessageParam + +/** + * Extended assistant message type to support Z.ai's interleaved thinking. + * Z.ai's API returns reasoning_content alongside content and tool_calls, + * and requires it to be passed back in subsequent requests for preserved thinking. + */ +export type ZAiAssistantMessage = AssistantMessage & { + reasoning_content?: string +} + +/** + * Converts Anthropic messages to OpenAI format optimized for Z.ai's GLM-4.7 thinking mode. + * + * Key differences from standard OpenAI format: + * - Preserves reasoning_content on assistant messages for interleaved thinking + * - Text content after tool_results (like environment_details) is merged into the last tool message + * to avoid creating user messages that would cause reasoning_content to be dropped + * + * @param messages Array of Anthropic messages + * @param options Optional configuration for message conversion + * @param options.mergeToolResultText If true, merge text content after tool_results into the last + * tool message instead of creating a separate user message. + * This is critical for Z.ai's interleaved thinking mode. + * @returns Array of OpenAI messages optimized for Z.ai's thinking mode + */ +export function convertToZAiFormat( + messages: AnthropicMessage[], + options?: { mergeToolResultText?: boolean }, +): Message[] { + const result: Message[] = [] + + for (const message of messages) { + // Check if the message has reasoning_content (for Z.ai interleaved thinking) + const messageWithReasoning = message as AnthropicMessage & { reasoning_content?: string } + const reasoningContent = messageWithReasoning.reasoning_content + + if (message.role === "user") { + // Handle user messages - may contain tool_result blocks + if (Array.isArray(message.content)) { + const textParts: string[] = [] + const imageParts: ContentPartImage[] = [] + const toolResults: { tool_use_id: string; content: string }[] = [] + + for (const part of message.content) { + if (part.type === "text") { + textParts.push(part.text) + } else if (part.type === "image") { + imageParts.push({ + type: "image_url", + image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` }, + }) + } else if (part.type === "tool_result") { + // Convert tool_result to OpenAI tool message format + let content: string + if (typeof part.content === "string") { + content = part.content + } else if (Array.isArray(part.content)) { + content = + part.content + ?.map((c) => { + if (c.type === "text") return c.text + if (c.type === "image") return "(image)" + return "" + }) + .join("\n") ?? "" + } else { + content = "" + } + toolResults.push({ + tool_use_id: part.tool_use_id, + content, + }) + } + } + + // Add tool messages first (they must follow assistant tool_use) + for (const toolResult of toolResults) { + const toolMessage: ToolMessage = { + role: "tool", + tool_call_id: toolResult.tool_use_id, + content: toolResult.content, + } + result.push(toolMessage) + } + + // Handle text/image content after tool results + if (textParts.length > 0 || imageParts.length > 0) { + // For Z.ai interleaved thinking: when mergeToolResultText is enabled and we have + // tool results followed by text, merge the text into the last tool message to avoid + // creating a user message that would cause reasoning_content to be dropped. + // This is critical because Z.ai drops all reasoning_content when it sees a user message. + const shouldMergeIntoToolMessage = + options?.mergeToolResultText && toolResults.length > 0 && imageParts.length === 0 + + if (shouldMergeIntoToolMessage) { + // Merge text content into the last tool message + const lastToolMessage = result[result.length - 1] as ToolMessage + if (lastToolMessage?.role === "tool") { + const additionalText = textParts.join("\n") + lastToolMessage.content = `${lastToolMessage.content}\n\n${additionalText}` + } + } else { + // Standard behavior: add user message with text/image content + let content: UserMessage["content"] + if (imageParts.length > 0) { + const parts: (ContentPartText | ContentPartImage)[] = [] + if (textParts.length > 0) { + parts.push({ type: "text", text: textParts.join("\n") }) + } + parts.push(...imageParts) + content = parts + } else { + content = textParts.join("\n") + } + + // Check if we can merge with the last message + const lastMessage = result[result.length - 1] + if (lastMessage?.role === "user") { + // Merge with existing user message + if (typeof lastMessage.content === "string" && typeof content === "string") { + lastMessage.content += `\n${content}` + } else { + const lastContent = Array.isArray(lastMessage.content) + ? lastMessage.content + : [{ type: "text" as const, text: lastMessage.content || "" }] + const newContent = Array.isArray(content) + ? content + : [{ type: "text" as const, text: content }] + lastMessage.content = [...lastContent, ...newContent] as UserMessage["content"] + } + } else { + result.push({ role: "user", content }) + } + } + } + } else { + // Simple string content + const lastMessage = result[result.length - 1] + if (lastMessage?.role === "user") { + if (typeof lastMessage.content === "string") { + lastMessage.content += `\n${message.content}` + } else { + ;(lastMessage.content as (ContentPartText | ContentPartImage)[]).push({ + type: "text", + text: message.content, + }) + } + } else { + result.push({ role: "user", content: message.content }) + } + } + } else if (message.role === "assistant") { + // Handle assistant messages - may contain tool_use blocks and reasoning blocks + if (Array.isArray(message.content)) { + const textParts: string[] = [] + const toolCalls: OpenAI.Chat.ChatCompletionMessageToolCall[] = [] + let extractedReasoning: string | undefined + + for (const part of message.content) { + if (part.type === "text") { + textParts.push(part.text) + } else if (part.type === "tool_use") { + toolCalls.push({ + id: part.id, + type: "function", + function: { + name: part.name, + arguments: JSON.stringify(part.input), + }, + }) + } else if ((part as any).type === "reasoning" && (part as any).text) { + // Extract reasoning from content blocks (Task stores it this way) + extractedReasoning = (part as any).text + } + } + + // Use reasoning from content blocks if not provided at top level + const finalReasoning = reasoningContent || extractedReasoning + + const assistantMessage: ZAiAssistantMessage = { + role: "assistant", + content: textParts.length > 0 ? textParts.join("\n") : null, + ...(toolCalls.length > 0 && { tool_calls: toolCalls }), + // Preserve reasoning_content for Z.ai interleaved thinking + ...(finalReasoning && { reasoning_content: finalReasoning }), + } + + // Check if we can merge with the last message (only if no tool calls) + const lastMessage = result[result.length - 1] + if (lastMessage?.role === "assistant" && !toolCalls.length && !(lastMessage as any).tool_calls) { + // Merge text content + if (typeof lastMessage.content === "string" && typeof assistantMessage.content === "string") { + lastMessage.content += `\n${assistantMessage.content}` + } else if (assistantMessage.content) { + const lastContent = lastMessage.content || "" + lastMessage.content = `${lastContent}\n${assistantMessage.content}` + } + // Preserve reasoning_content from the new message if present + if (finalReasoning) { + ;(lastMessage as ZAiAssistantMessage).reasoning_content = finalReasoning + } + } else { + result.push(assistantMessage) + } + } else { + // Simple string content + const lastMessage = result[result.length - 1] + if (lastMessage?.role === "assistant" && !(lastMessage as any).tool_calls) { + if (typeof lastMessage.content === "string") { + lastMessage.content += `\n${message.content}` + } else { + lastMessage.content = message.content + } + // Preserve reasoning_content from the new message if present + if (reasoningContent) { + ;(lastMessage as ZAiAssistantMessage).reasoning_content = reasoningContent + } + } else { + const assistantMessage: ZAiAssistantMessage = { + role: "assistant", + content: message.content, + ...(reasoningContent && { reasoning_content: reasoningContent }), + } + result.push(assistantMessage) + } + } + } + } + + return result +} diff --git a/src/core/condense/__tests__/condense.spec.ts b/src/core/condense/__tests__/condense.spec.ts index 2558ee5b33a..bea7d50ac17 100644 --- a/src/core/condense/__tests__/condense.spec.ts +++ b/src/core/condense/__tests__/condense.spec.ts @@ -86,7 +86,19 @@ describe("Condense", () => { // Verify we have a summary message const summaryMessage = result.messages.find((msg) => msg.isSummary) expect(summaryMessage).toBeTruthy() - expect(summaryMessage?.content).toBe("Mock summary of the conversation") + // Summary content is now always an array with a synthetic reasoning block + text block + // for DeepSeek-reasoner compatibility + expect(Array.isArray(summaryMessage?.content)).toBe(true) + const contentArray = summaryMessage?.content as Anthropic.Messages.ContentBlockParam[] + expect(contentArray).toHaveLength(2) + expect(contentArray[0]).toEqual({ + type: "reasoning", + text: "Condensing conversation context. The summary below captures the key information from the prior conversation.", + }) + expect(contentArray[1]).toEqual({ + type: "text", + text: "Mock summary of the conversation", + }) // With non-destructive condensing, all messages are retained (tagged but not deleted) // Use getEffectiveApiHistory to verify the effective view matches the old behavior diff --git a/src/core/condense/__tests__/index.spec.ts b/src/core/condense/__tests__/index.spec.ts index fe17b09ee37..1309afb2213 100644 --- a/src/core/condense/__tests__/index.spec.ts +++ b/src/core/condense/__tests__/index.spec.ts @@ -246,6 +246,94 @@ describe("getKeepMessagesWithToolBlocks", () => { expect(result.keepMessages).toEqual(messages) expect(result.toolUseBlocksToPreserve).toHaveLength(0) }) + + it("should preserve reasoning blocks alongside tool_use blocks for DeepSeek/Z.ai interleaved thinking", () => { + const reasoningBlock = { + type: "reasoning" as const, + text: "Let me think about this step by step...", + } + const toolUseBlock = { + type: "tool_use" as const, + id: "toolu_deepseek_123", + name: "read_file", + input: { path: "test.txt" }, + } + const toolResultBlock = { + type: "tool_result" as const, + tool_use_id: "toolu_deepseek_123", + content: "file contents", + } + + const messages: ApiMessage[] = [ + { role: "user", content: "Hello", ts: 1 }, + { role: "assistant", content: "Let me help", ts: 2 }, + { role: "user", content: "Please read the file", ts: 3 }, + { + role: "assistant", + // DeepSeek stores reasoning as content blocks alongside tool_use + content: [reasoningBlock as any, { type: "text" as const, text: "Reading file..." }, toolUseBlock], + ts: 4, + }, + { + role: "user", + content: [toolResultBlock, { type: "text" as const, text: "Continue" }], + ts: 5, + }, + { role: "assistant", content: "Got it, the file says...", ts: 6 }, + { role: "user", content: "Thanks", ts: 7 }, + ] + + const result = getKeepMessagesWithToolBlocks(messages, 3) + + // keepMessages should be the last 3 messages + expect(result.keepMessages).toHaveLength(3) + expect(result.keepMessages[0].ts).toBe(5) + + // Should preserve the tool_use block + expect(result.toolUseBlocksToPreserve).toHaveLength(1) + expect(result.toolUseBlocksToPreserve[0]).toEqual(toolUseBlock) + + // Should preserve the reasoning block for DeepSeek/Z.ai interleaved thinking + expect(result.reasoningBlocksToPreserve).toHaveLength(1) + expect((result.reasoningBlocksToPreserve[0] as any).type).toBe("reasoning") + expect((result.reasoningBlocksToPreserve[0] as any).text).toBe("Let me think about this step by step...") + }) + + it("should return empty reasoningBlocksToPreserve when no reasoning blocks present", () => { + const toolUseBlock = { + type: "tool_use" as const, + id: "toolu_123", + name: "read_file", + input: { path: "test.txt" }, + } + const toolResultBlock = { + type: "tool_result" as const, + tool_use_id: "toolu_123", + content: "file contents", + } + + const messages: ApiMessage[] = [ + { role: "user", content: "Hello", ts: 1 }, + { + role: "assistant", + // No reasoning block, just text and tool_use + content: [{ type: "text" as const, text: "Reading file..." }, toolUseBlock], + ts: 2, + }, + { + role: "user", + content: [toolResultBlock], + ts: 3, + }, + { role: "assistant", content: "Done", ts: 4 }, + { role: "user", content: "Thanks", ts: 5 }, + ] + + const result = getKeepMessagesWithToolBlocks(messages, 3) + + expect(result.toolUseBlocksToPreserve).toHaveLength(1) + expect(result.reasoningBlocksToPreserve).toHaveLength(0) + }) }) describe("getMessagesSinceLastSummary", () => { @@ -422,7 +510,14 @@ describe("summarizeConversation", () => { const summaryMessage = result.messages.find((m) => m.isSummary) expect(summaryMessage).toBeDefined() expect(summaryMessage!.role).toBe("assistant") - expect(summaryMessage!.content).toBe("This is a summary") + // Summary content is now always an array with [synthetic reasoning, text] + // for DeepSeek-reasoner compatibility (requires reasoning_content on all assistant messages) + expect(Array.isArray(summaryMessage!.content)).toBe(true) + const content = summaryMessage!.content as any[] + expect(content).toHaveLength(2) + expect(content[0].type).toBe("reasoning") + expect(content[1].type).toBe("text") + expect(content[1].text).toBe("This is a summary") expect(summaryMessage!.isSummary).toBe(true) // Verify that the effective API history matches expected: first + summary + last N messages @@ -827,14 +922,16 @@ describe("summarizeConversation", () => { expect(summaryMessage!.isSummary).toBe(true) expect(Array.isArray(summaryMessage!.content)).toBe(true) - // Content should be [text block, tool_use block] + // Content should be [synthetic reasoning, text block, tool_use block] + // The synthetic reasoning is always added for DeepSeek-reasoner compatibility const content = summaryMessage!.content as Anthropic.Messages.ContentBlockParam[] - expect(content).toHaveLength(2) - expect(content[0].type).toBe("text") - expect((content[0] as Anthropic.Messages.TextBlockParam).text).toBe("Summary of conversation") - expect(content[1].type).toBe("tool_use") - expect((content[1] as Anthropic.Messages.ToolUseBlockParam).id).toBe("toolu_123") - expect((content[1] as Anthropic.Messages.ToolUseBlockParam).name).toBe("read_file") + expect(content).toHaveLength(3) + expect((content[0] as any).type).toBe("reasoning") // Synthetic reasoning for DeepSeek + expect(content[1].type).toBe("text") + expect((content[1] as Anthropic.Messages.TextBlockParam).text).toBe("Summary of conversation") + expect(content[2].type).toBe("tool_use") + expect((content[2] as Anthropic.Messages.ToolUseBlockParam).id).toBe("toolu_123") + expect((content[2] as Anthropic.Messages.ToolUseBlockParam).name).toBe("read_file") // With non-destructive condensing, all messages are retained plus the summary expect(result.messages.length).toBe(messages.length + 1) // all original + summary @@ -981,7 +1078,10 @@ describe("summarizeConversation", () => { expect(summaryMessage).toBeDefined() expect(Array.isArray(summaryMessage!.content)).toBe(true) const summaryContent = summaryMessage!.content as Anthropic.Messages.ContentBlockParam[] - expect(summaryContent[0]).toEqual({ type: "text", text: "This is a summary" }) + // First block is synthetic reasoning for DeepSeek-reasoner compatibility + expect((summaryContent[0] as any).type).toBe("reasoning") + // Second block is the text summary + expect(summaryContent[1]).toEqual({ type: "text", text: "This is a summary" }) const preservedToolUses = summaryContent.filter( (block): block is Anthropic.Messages.ToolUseBlockParam => block.type === "tool_use", @@ -989,6 +1089,153 @@ describe("summarizeConversation", () => { expect(preservedToolUses).toHaveLength(2) expect(preservedToolUses.map((block) => block.id)).toEqual(["toolu_parallel_1", "toolu_parallel_2"]) }) + + it("should preserve reasoning blocks in summary message for DeepSeek/Z.ai interleaved thinking", async () => { + const reasoningBlock = { + type: "reasoning" as const, + text: "Let me think about this step by step...", + } + const toolUseBlock = { + type: "tool_use" as const, + id: "toolu_deepseek_reason", + name: "read_file", + input: { path: "test.txt" }, + } + const toolResultBlock = { + type: "tool_result" as const, + tool_use_id: "toolu_deepseek_reason", + content: "file contents", + } + + const messages: ApiMessage[] = [ + { role: "user", content: "Hello", ts: 1 }, + { role: "assistant", content: "Let me help", ts: 2 }, + { role: "user", content: "Please read the file", ts: 3 }, + { + role: "assistant", + // DeepSeek stores reasoning as content blocks alongside tool_use + content: [reasoningBlock as any, { type: "text" as const, text: "Reading file..." }, toolUseBlock], + ts: 4, + }, + { + role: "user", + content: [toolResultBlock, { type: "text" as const, text: "Continue" }], + ts: 5, + }, + { role: "assistant", content: "Got it, the file says...", ts: 6 }, + { role: "user", content: "Thanks", ts: 7 }, + ] + + // Create a stream with usage information + const streamWithUsage = (async function* () { + yield { type: "text" as const, text: "Summary of conversation" } + yield { type: "usage" as const, totalCost: 0.05, outputTokens: 100 } + })() + + mockApiHandler.createMessage = vi.fn().mockReturnValue(streamWithUsage) as any + mockApiHandler.countTokens = vi.fn().mockImplementation(() => Promise.resolve(50)) as any + + const result = await summarizeConversation( + messages, + mockApiHandler, + defaultSystemPrompt, + taskId, + DEFAULT_PREV_CONTEXT_TOKENS, + false, // isAutomaticTrigger + undefined, // customCondensingPrompt + undefined, // condensingApiHandler + true, // useNativeTools - required for tool_use block preservation + ) + + // Find the summary message + const summaryMessage = result.messages.find((m) => m.isSummary) + expect(summaryMessage).toBeDefined() + expect(summaryMessage!.role).toBe("assistant") + expect(summaryMessage!.isSummary).toBe(true) + expect(Array.isArray(summaryMessage!.content)).toBe(true) + + // Content should be [synthetic reasoning, preserved reasoning, text block, tool_use block] + // - Synthetic reasoning is always added for DeepSeek-reasoner compatibility + // - Preserved reasoning from the condensed assistant message + // This order ensures reasoning_content is always present for DeepSeek/Z.ai + const content = summaryMessage!.content as Anthropic.Messages.ContentBlockParam[] + expect(content).toHaveLength(4) + + // First block should be synthetic reasoning + expect((content[0] as any).type).toBe("reasoning") + expect((content[0] as any).text).toContain("Condensing conversation context") + + // Second block should be preserved reasoning from the condensed message + expect((content[1] as any).type).toBe("reasoning") + expect((content[1] as any).text).toBe("Let me think about this step by step...") + + // Third block should be text (the summary) + expect(content[2].type).toBe("text") + expect((content[2] as Anthropic.Messages.TextBlockParam).text).toBe("Summary of conversation") + + // Fourth block should be tool_use + expect(content[3].type).toBe("tool_use") + expect((content[3] as Anthropic.Messages.ToolUseBlockParam).id).toBe("toolu_deepseek_reason") + + expect(result.error).toBeUndefined() + }) + + it("should include synthetic reasoning block in summary for DeepSeek-reasoner compatibility even without tool_use blocks", async () => { + // This test verifies the fix for the DeepSeek-reasoner 400 error: + // "Missing `reasoning_content` field in the assistant message at message index 1" + // DeepSeek-reasoner requires reasoning_content on ALL assistant messages, not just those with tool_calls. + // After condensation, the summary becomes an assistant message that needs reasoning_content. + const messages: ApiMessage[] = [ + { role: "user", content: "Tell me a joke", ts: 1 }, + { role: "assistant", content: "Why did the programmer quit?", ts: 2 }, + { role: "user", content: "I don't know, why?", ts: 3 }, + { role: "assistant", content: "He didn't get arrays!", ts: 4 }, + { role: "user", content: "Another one please", ts: 5 }, + { role: "assistant", content: "Why do programmers prefer dark mode?", ts: 6 }, + { role: "user", content: "Why?", ts: 7 }, + ] + + // Create a stream with usage information (no tool calls in this conversation) + const streamWithUsage = (async function* () { + yield { type: "text" as const, text: "Summary: User requested jokes." } + yield { type: "usage" as const, totalCost: 0.05, outputTokens: 100 } + })() + + mockApiHandler.createMessage = vi.fn().mockReturnValue(streamWithUsage) as any + mockApiHandler.countTokens = vi.fn().mockImplementation(() => Promise.resolve(50)) as any + + const result = await summarizeConversation( + messages, + mockApiHandler, + defaultSystemPrompt, + taskId, + DEFAULT_PREV_CONTEXT_TOKENS, + false, // isAutomaticTrigger + undefined, // customCondensingPrompt + undefined, // condensingApiHandler + false, // useNativeTools - not using tools in this test + ) + + // Find the summary message + const summaryMessage = result.messages.find((m) => m.isSummary) + expect(summaryMessage).toBeDefined() + expect(summaryMessage!.role).toBe("assistant") + expect(summaryMessage!.isSummary).toBe(true) + + // CRITICAL: Content must be an array with a synthetic reasoning block + // This is required for DeepSeek-reasoner which needs reasoning_content on all assistant messages + expect(Array.isArray(summaryMessage!.content)).toBe(true) + const content = summaryMessage!.content as any[] + + // Should have [synthetic reasoning, text] + expect(content).toHaveLength(2) + expect(content[0].type).toBe("reasoning") + expect(content[0].text).toContain("Condensing conversation context") + expect(content[1].type).toBe("text") + expect(content[1].text).toBe("Summary: User requested jokes.") + + expect(result.error).toBeUndefined() + }) }) describe("summarizeConversation with custom settings", () => { diff --git a/src/core/condense/index.ts b/src/core/condense/index.ts index b8af4d1de24..3238eca7071 100644 --- a/src/core/condense/index.ts +++ b/src/core/condense/index.ts @@ -30,22 +30,49 @@ function getToolUseBlocks(message: ApiMessage): Anthropic.Messages.ToolUseBlock[ return message.content.filter((block) => block.type === "tool_use") as Anthropic.Messages.ToolUseBlock[] } +/** + * Gets reasoning blocks from a message's content array. + * Task stores reasoning as {type: "reasoning", text: "..."} blocks, + * which convertToR1Format and convertToZAiFormat already know how to extract. + */ +function getReasoningBlocks(message: ApiMessage): Anthropic.Messages.ContentBlockParam[] { + if (message.role !== "assistant" || typeof message.content === "string") { + return [] + } + // Filter for reasoning blocks and cast to ContentBlockParam (the type field is compatible) + return message.content.filter((block) => (block as any).type === "reasoning") as any[] +} + +/** + * Result of getKeepMessagesWithToolBlocks + */ +export type KeepMessagesResult = { + keepMessages: ApiMessage[] + toolUseBlocksToPreserve: Anthropic.Messages.ToolUseBlock[] + // Reasoning blocks from the preceding assistant message, needed for DeepSeek/Z.ai + // when tool_use blocks are preserved. Task stores reasoning as {type: "reasoning", text: "..."} + // blocks, and convertToR1Format/convertToZAiFormat already extract these. + reasoningBlocksToPreserve: Anthropic.Messages.ContentBlockParam[] +} + /** * Extracts tool_use blocks that need to be preserved to match tool_result blocks in keepMessages. * When the first kept message is a user message with tool_result blocks, * we need to find the corresponding tool_use blocks from the preceding assistant message. * These tool_use blocks will be appended to the summary message to maintain proper pairing. * + * Also extracts reasoning blocks from the preceding assistant message, which are required + * by DeepSeek and Z.ai for interleaved thinking mode. Without these, the API returns a 400 error + * "Missing reasoning_content field in the assistant message". + * See: https://api-docs.deepseek.com/guides/thinking_mode#tool-calls + * * @param messages - The full conversation messages * @param keepCount - The number of messages to keep from the end - * @returns Object containing keepMessages and any tool_use blocks to preserve + * @returns Object containing keepMessages, tool_use blocks, and reasoning blocks to preserve */ -export function getKeepMessagesWithToolBlocks( - messages: ApiMessage[], - keepCount: number, -): { keepMessages: ApiMessage[]; toolUseBlocksToPreserve: Anthropic.Messages.ToolUseBlock[] } { +export function getKeepMessagesWithToolBlocks(messages: ApiMessage[], keepCount: number): KeepMessagesResult { if (messages.length <= keepCount) { - return { keepMessages: messages, toolUseBlocksToPreserve: [] } + return { keepMessages: messages, toolUseBlocksToPreserve: [], reasoningBlocksToPreserve: [] } } const startIndex = messages.length - keepCount @@ -59,13 +86,20 @@ export function getKeepMessagesWithToolBlocks( const precedingMessage = messages[precedingIndex] const toolUseBlocks = getToolUseBlocks(precedingMessage) if (toolUseBlocks.length > 0) { - // Return the tool_use blocks to be merged into the summary message - return { keepMessages, toolUseBlocksToPreserve: toolUseBlocks } + // Also extract reasoning blocks for DeepSeek/Z.ai interleaved thinking + // Task stores reasoning as {type: "reasoning", text: "..."} content blocks + const reasoningBlocks = getReasoningBlocks(precedingMessage) + // Return the tool_use blocks and reasoning blocks to be merged into the summary message + return { + keepMessages, + toolUseBlocksToPreserve: toolUseBlocks, + reasoningBlocksToPreserve: reasoningBlocks, + } } } } - return { keepMessages, toolUseBlocksToPreserve: [] } + return { keepMessages, toolUseBlocksToPreserve: [], reasoningBlocksToPreserve: [] } } export const N_MESSAGES_TO_KEEP = 3 @@ -168,11 +202,15 @@ export async function summarizeConversation( // Always preserve the first message (which may contain slash command content) const firstMessage = messages[0] - // Get keepMessages and any tool_use blocks that need to be preserved for tool_result pairing - // Only preserve tool_use blocks when using native tools protocol (XML protocol doesn't need them) - const { keepMessages, toolUseBlocksToPreserve } = useNativeTools + // Get keepMessages and any tool_use/reasoning blocks that need to be preserved for tool_result pairing + // Only preserve these blocks when using native tools protocol (XML protocol doesn't need them) + const { keepMessages, toolUseBlocksToPreserve, reasoningBlocksToPreserve } = useNativeTools ? getKeepMessagesWithToolBlocks(messages, N_MESSAGES_TO_KEEP) - : { keepMessages: messages.slice(-N_MESSAGES_TO_KEEP), toolUseBlocksToPreserve: [] } + : { + keepMessages: messages.slice(-N_MESSAGES_TO_KEEP), + toolUseBlocksToPreserve: [], + reasoningBlocksToPreserve: [], + } const keepStartIndex = Math.max(messages.length - N_MESSAGES_TO_KEEP, 0) const includeFirstKeptMessageInSummary = toolUseBlocksToPreserve.length > 0 @@ -257,15 +295,39 @@ export async function summarizeConversation( } // Build the summary message content - // If there are tool_use blocks to preserve (for tool_result pairing), append them to the summary - let summaryContent: string | Anthropic.Messages.ContentBlockParam[] + // CRITICAL: Always include a reasoning block in the summary for DeepSeek-reasoner compatibility. + // DeepSeek-reasoner requires `reasoning_content` on ALL assistant messages, not just those with tool_calls. + // Without this, we get: "400 Missing `reasoning_content` field in the assistant message" + // See: https://api-docs.deepseek.com/guides/thinking_mode + // + // The summary content structure is: + // 1. Synthetic reasoning block (always present) - for DeepSeek-reasoner compatibility + // 2. Any preserved reasoning blocks from the condensed assistant message (if tool_use blocks are preserved) + // 3. Text block with the summary + // 4. Tool_use blocks (if any need to be preserved for tool_result pairing) + + // Create a synthetic reasoning block that explains the summary + // This is minimal but satisfies DeepSeek's requirement for reasoning_content on all assistant messages + const syntheticReasoningBlock = { + type: "reasoning" as const, + text: "Condensing conversation context. The summary below captures the key information from the prior conversation.", + } + + const textBlock: Anthropic.Messages.TextBlockParam = { type: "text", text: summary } + + let summaryContent: Anthropic.Messages.ContentBlockParam[] if (toolUseBlocksToPreserve.length > 0) { - // Create content array with text block followed by tool_use blocks - // Use TextBlockParam which doesn't require citations field - const textBlock: Anthropic.Messages.TextBlockParam = { type: "text", text: summary } - summaryContent = [textBlock, ...toolUseBlocksToPreserve] + // Include: synthetic reasoning, preserved reasoning (if any), summary text, and tool_use blocks + summaryContent = [ + syntheticReasoningBlock as unknown as Anthropic.Messages.ContentBlockParam, + ...reasoningBlocksToPreserve, + textBlock, + ...toolUseBlocksToPreserve, + ] } else { - summaryContent = summary + // Include: synthetic reasoning and summary text + // This ensures the summary always has reasoning_content for DeepSeek-reasoner + summaryContent = [syntheticReasoningBlock as unknown as Anthropic.Messages.ContentBlockParam, textBlock] } // Generate a unique condenseId for this summary diff --git a/src/core/task-persistence/apiMessages.ts b/src/core/task-persistence/apiMessages.ts index 1366ce6b787..c05538db6a4 100644 --- a/src/core/task-persistence/apiMessages.ts +++ b/src/core/task-persistence/apiMessages.ts @@ -20,6 +20,9 @@ export type ApiMessage = Anthropic.MessageParam & { text?: string // For OpenRouter reasoning_details array format (used by Gemini 3, etc.) reasoning_details?: any[] + // For DeepSeek/Z.ai interleaved thinking: reasoning_content that must be preserved during tool call sequences + // See: https://api-docs.deepseek.com/guides/thinking_mode#tool-calls + reasoning_content?: string // For non-destructive condense: unique identifier for summary messages condenseId?: string // For non-destructive condense: points to the condenseId of the summary that replaces this message diff --git a/src/core/tools/SearchAndReplaceTool.ts b/src/core/tools/SearchAndReplaceTool.ts index 49d159f4557..7d03a6a22c3 100644 --- a/src/core/tools/SearchAndReplaceTool.ts +++ b/src/core/tools/SearchAndReplaceTool.ts @@ -111,6 +111,8 @@ export class SearchAndReplaceTool extends BaseTool<"search_and_replace"> { let fileContent: string try { fileContent = await fs.readFile(absolutePath, "utf8") + // Normalize line endings to LF for consistent matching + fileContent = fileContent.replace(/\r\n/g, "\n") } catch (error) { task.consecutiveMistakeCount++ task.recordToolError("search_and_replace") @@ -125,7 +127,9 @@ export class SearchAndReplaceTool extends BaseTool<"search_and_replace"> { const errors: string[] = [] for (let i = 0; i < operations.length; i++) { - const { search, replace } = operations[i] + // Normalize line endings in search/replace strings to match file content + const search = operations[i].search.replace(/\r\n/g, "\n") + const replace = operations[i].replace.replace(/\r\n/g, "\n") const searchPattern = new RegExp(escapeRegExp(search), "g") const matchCount = newContent.match(searchPattern)?.length ?? 0 diff --git a/src/core/tools/SearchReplaceTool.ts b/src/core/tools/SearchReplaceTool.ts index b0150d04a17..dadb97fde5a 100644 --- a/src/core/tools/SearchReplaceTool.ts +++ b/src/core/tools/SearchReplaceTool.ts @@ -105,6 +105,8 @@ export class SearchReplaceTool extends BaseTool<"search_replace"> { let fileContent: string try { fileContent = await fs.readFile(absolutePath, "utf8") + // Normalize line endings to LF for consistent matching + fileContent = fileContent.replace(/\r\n/g, "\n") } catch (error) { task.consecutiveMistakeCount++ task.recordToolError("search_replace") @@ -114,8 +116,12 @@ export class SearchReplaceTool extends BaseTool<"search_replace"> { return } + // Normalize line endings in search/replace strings to match file content + const normalizedOldString = old_string.replace(/\r\n/g, "\n") + const normalizedNewString = new_string.replace(/\r\n/g, "\n") + // Check for exact match (literal string, not regex) - const matchCount = fileContent.split(old_string).length - 1 + const matchCount = fileContent.split(normalizedOldString).length - 1 if (matchCount === 0) { task.consecutiveMistakeCount++ @@ -142,7 +148,7 @@ export class SearchReplaceTool extends BaseTool<"search_replace"> { } // Apply the single replacement - const newContent = fileContent.replace(old_string, new_string) + const newContent = fileContent.replace(normalizedOldString, normalizedNewString) // Check if any changes were made if (newContent === fileContent) { diff --git a/src/core/tools/__tests__/applyDiffTool.experiment.spec.ts b/src/core/tools/__tests__/applyDiffTool.experiment.spec.ts index 3bd3a4ad64e..f876cf92112 100644 --- a/src/core/tools/__tests__/applyDiffTool.experiment.spec.ts +++ b/src/core/tools/__tests__/applyDiffTool.experiment.spec.ts @@ -123,7 +123,7 @@ describe("applyDiffTool experiment routing", () => { mockRemoveClosingTag = vi.fn((tag, value) => value) }) - it("should use legacy tool when MULTI_FILE_APPLY_DIFF experiment is disabled", async () => { + it("should always use class-based tool with native protocol (XML deprecated)", async () => { mockProvider.getState.mockResolvedValue({ experiments: { [EXPERIMENT_IDS.MULTI_FILE_APPLY_DIFF]: false, @@ -142,16 +142,17 @@ describe("applyDiffTool experiment routing", () => { mockRemoveClosingTag, ) + // Always uses native protocol now (XML deprecated) expect(applyDiffToolClass.handle).toHaveBeenCalledWith(mockCline, mockBlock, { askApproval: mockAskApproval, handleError: mockHandleError, pushToolResult: mockPushToolResult, removeClosingTag: mockRemoveClosingTag, - toolProtocol: "xml", + toolProtocol: "native", }) }) - it("should use legacy tool when experiments are not defined", async () => { + it("should use class-based tool when experiments are not defined", async () => { mockProvider.getState.mockResolvedValue({}) // Mock the class-based tool to resolve successfully @@ -166,24 +167,26 @@ describe("applyDiffTool experiment routing", () => { mockRemoveClosingTag, ) + // Always uses native protocol now (XML deprecated) expect(applyDiffToolClass.handle).toHaveBeenCalledWith(mockCline, mockBlock, { askApproval: mockAskApproval, handleError: mockHandleError, pushToolResult: mockPushToolResult, removeClosingTag: mockRemoveClosingTag, - toolProtocol: "xml", + toolProtocol: "native", }) }) - it("should use multi-file tool when MULTI_FILE_APPLY_DIFF experiment is enabled and using XML protocol", async () => { + it("should use class-based tool when MULTI_FILE_APPLY_DIFF experiment is enabled (native protocol always used)", async () => { mockProvider.getState.mockResolvedValue({ experiments: { [EXPERIMENT_IDS.MULTI_FILE_APPLY_DIFF]: true, }, }) - // Mock the new tool behavior - it should continue with the multi-file implementation - // Since we're not mocking the entire function, we'll just verify it doesn't call the class-based tool + // Mock the class-based tool to resolve successfully + ;(applyDiffToolClass.handle as any).mockResolvedValue(undefined) + await multiApplyDiffTool( mockCline, mockBlock, @@ -193,7 +196,14 @@ describe("applyDiffTool experiment routing", () => { mockRemoveClosingTag, ) - expect(applyDiffToolClass.handle).not.toHaveBeenCalled() + // Native protocol is always used now, so class-based tool is always called + expect(applyDiffToolClass.handle).toHaveBeenCalledWith(mockCline, mockBlock, { + askApproval: mockAskApproval, + handleError: mockHandleError, + pushToolResult: mockPushToolResult, + removeClosingTag: mockRemoveClosingTag, + toolProtocol: "native", + }) }) it("should use class-based tool when model defaults to native protocol", async () => { diff --git a/src/core/tools/__tests__/multiApplyDiffTool.spec.ts b/src/core/tools/__tests__/multiApplyDiffTool.spec.ts index 5a9a1af1da3..0910550dd82 100644 --- a/src/core/tools/__tests__/multiApplyDiffTool.spec.ts +++ b/src/core/tools/__tests__/multiApplyDiffTool.spec.ts @@ -1,4 +1,5 @@ import { applyDiffTool } from "../MultiApplyDiffTool" +import { applyDiffTool as applyDiffToolClass } from "../ApplyDiffTool" import { EXPERIMENT_IDS } from "../../../shared/experiments" import * as fs from "fs/promises" import * as fileUtils from "../../../utils/fs" @@ -9,8 +10,12 @@ vi.mock("fs/promises") vi.mock("../../../utils/fs") vi.mock("../../../utils/path") vi.mock("../../../utils/xml") -vi.mock("../applyDiffTool", () => ({ - applyDiffToolLegacy: vi.fn(), + +// Mock the ApplyDiffTool class-based tool that MultiApplyDiffTool delegates to for native protocol +vi.mock("../ApplyDiffTool", () => ({ + applyDiffTool: { + handle: vi.fn().mockResolvedValue(undefined), + }, })) // Mock TelemetryService @@ -116,8 +121,8 @@ describe("multiApplyDiffTool", () => { ;(pathUtils.getReadablePath as any).mockImplementation((cwd: string, path: string) => path) }) - describe("Early content validation", () => { - it("should filter out non-string content at parse time", async () => { + describe("Native protocol delegation", () => { + it("should delegate to applyDiffToolClass.handle for XML args format", async () => { mockBlock = { params: { args: ` @@ -130,22 +135,6 @@ describe("multiApplyDiffTool", () => { partial: false, } - // Mock parseXml to return mixed content types - const parseXml = await import("../../../utils/xml") - ;(parseXml.parseXml as any).mockReturnValue({ - file: { - path: "test.ts", - diff: [ - { content: "<<<<<<< SEARCH\ntest\n=======\nreplaced\n>>>>>>> REPLACE" }, - { content: null }, - { content: undefined }, - { content: 42 }, - { content: "" }, // Empty string should also be filtered - { content: "<<<<<<< SEARCH\nmore\n=======\nchanges\n>>>>>>> REPLACE" }, - ], - }, - }) - await applyDiffTool( mockCline, mockBlock, @@ -155,23 +144,26 @@ describe("multiApplyDiffTool", () => { mockRemoveClosingTag, ) - // Should complete without error and only process valid string content - expect(mockPushToolResult).toHaveBeenCalled() - expect(mockHandleError).not.toHaveBeenCalled() - - // Verify that only valid diffs were processed - const resultCall = mockPushToolResult.mock.calls[0][0] - // Should not include the single block notice since we have 2 valid blocks - expect(resultCall).not.toContain("Making multiple related changes") + // Should delegate to the class-based tool + expect(applyDiffToolClass.handle).toHaveBeenCalled() + expect(applyDiffToolClass.handle).toHaveBeenCalledWith( + mockCline, + mockBlock, + expect.objectContaining({ + askApproval: mockAskApproval, + handleError: mockHandleError, + pushToolResult: mockPushToolResult, + removeClosingTag: mockRemoveClosingTag, + toolProtocol: "native", + }), + ) }) - }) - describe("SEARCH block counting with non-string content", () => { - it("should handle diffItem.content being undefined", async () => { + it("should delegate to applyDiffToolClass.handle for legacy path/diff params", async () => { mockBlock = { params: { path: "test.ts", - diff: undefined, // This will result in undefined content + diff: "<<<<<<< SEARCH\nold\n=======\nnew\n>>>>>>> REPLACE", }, partial: false, } @@ -185,35 +177,30 @@ describe("multiApplyDiffTool", () => { mockRemoveClosingTag, ) - // Should complete without throwing an error - expect(mockPushToolResult).toHaveBeenCalled() - expect(mockHandleError).not.toHaveBeenCalled() + // Should delegate to the class-based tool + expect(applyDiffToolClass.handle).toHaveBeenCalled() + expect(applyDiffToolClass.handle).toHaveBeenCalledWith( + mockCline, + mockBlock, + expect.objectContaining({ + askApproval: mockAskApproval, + handleError: mockHandleError, + pushToolResult: mockPushToolResult, + removeClosingTag: mockRemoveClosingTag, + toolProtocol: "native", + }), + ) }) - it("should handle diffItem.content being null", async () => { + it("should handle undefined diff content by delegating to class-based tool", async () => { mockBlock = { params: { - args: ` - test.ts - - - - `, + path: "test.ts", + diff: undefined, }, partial: false, } - // Mock parseXml to return null content - const parseXml = await import("../../../utils/xml") - ;(parseXml.parseXml as any).mockReturnValue({ - file: { - path: "test.ts", - diff: { - content: null, - }, - }, - }) - await applyDiffTool( mockCline, mockBlock, @@ -223,35 +210,23 @@ describe("multiApplyDiffTool", () => { mockRemoveClosingTag, ) - // Should complete without throwing an error - expect(mockPushToolResult).toHaveBeenCalled() - expect(mockHandleError).not.toHaveBeenCalled() + // Should delegate to the class-based tool (which will handle the error) + expect(applyDiffToolClass.handle).toHaveBeenCalled() }) - it("should handle diffItem.content being a number", async () => { + it("should handle null diff content by delegating to class-based tool", async () => { mockBlock = { params: { args: ` test.ts - 123 + `, }, partial: false, } - // Mock parseXml to return number content - const parseXml = await import("../../../utils/xml") - ;(parseXml.parseXml as any).mockReturnValue({ - file: { - path: "test.ts", - diff: { - content: 123, // Number instead of string - }, - }, - }) - await applyDiffTool( mockCline, mockBlock, @@ -261,12 +236,11 @@ describe("multiApplyDiffTool", () => { mockRemoveClosingTag, ) - // Should complete without throwing an error - expect(mockPushToolResult).toHaveBeenCalled() - expect(mockHandleError).not.toHaveBeenCalled() + // Should delegate to the class-based tool + expect(applyDiffToolClass.handle).toHaveBeenCalled() }) - it("should correctly count SEARCH blocks when content is a valid string", async () => { + it("should delegate multiple SEARCH blocks to class-based tool", async () => { const diffContent = `<<<<<<< SEARCH old content ======= @@ -296,14 +270,11 @@ another new content mockRemoveClosingTag, ) - // Should complete successfully - expect(mockPushToolResult).toHaveBeenCalled() - const resultCall = mockPushToolResult.mock.calls[0][0] - // Should not include the single block notice since we have 2 blocks - expect(resultCall).not.toContain("Making multiple related changes") + // Should delegate to the class-based tool + expect(applyDiffToolClass.handle).toHaveBeenCalled() }) - it("should show single block notice when only one SEARCH block exists", async () => { + it("should delegate single SEARCH block to class-based tool", async () => { const diffContent = `<<<<<<< SEARCH old content ======= @@ -327,16 +298,13 @@ new content mockRemoveClosingTag, ) - // Should complete successfully - expect(mockPushToolResult).toHaveBeenCalled() - const resultCall = mockPushToolResult.mock.calls[0][0] - // Should include the single block notice - expect(resultCall).toContain("Making multiple related changes") + // Should delegate to the class-based tool + expect(applyDiffToolClass.handle).toHaveBeenCalled() }) }) describe("Edge cases for diff content", () => { - it("should handle empty diff array", async () => { + it("should handle empty diff by delegating to class-based tool", async () => { mockBlock = { params: { args: ` @@ -347,14 +315,6 @@ new content partial: false, } - const parseXml = await import("../../../utils/xml") - ;(parseXml.parseXml as any).mockReturnValue({ - file: { - path: "test.ts", - diff: [], - }, - }) - await applyDiffTool( mockCline, mockBlock, @@ -364,12 +324,12 @@ new content mockRemoveClosingTag, ) - // Should complete without error - expect(mockPushToolResult).toHaveBeenCalled() + // Should delegate to the class-based tool + expect(applyDiffToolClass.handle).toHaveBeenCalled() expect(mockHandleError).not.toHaveBeenCalled() }) - it("should handle mixed content types in diff array", async () => { + it("should handle mixed content types by delegating to class-based tool", async () => { mockBlock = { params: { args: ` @@ -382,20 +342,6 @@ new content partial: false, } - const parseXml = await import("../../../utils/xml") - ;(parseXml.parseXml as any).mockReturnValue({ - file: { - path: "test.ts", - diff: [ - { content: "<<<<<<< SEARCH\ntest\n=======\nreplaced\n>>>>>>> REPLACE" }, - { content: null }, - { content: undefined }, - { content: 42 }, - { content: "<<<<<<< SEARCH\nmore\n=======\nchanges\n>>>>>>> REPLACE" }, - ], - }, - }) - await applyDiffTool( mockCline, mockBlock, @@ -405,8 +351,8 @@ new content mockRemoveClosingTag, ) - // Should complete without error and count only valid string SEARCH blocks - expect(mockPushToolResult).toHaveBeenCalled() + // Should delegate to the class-based tool + expect(applyDiffToolClass.handle).toHaveBeenCalled() expect(mockHandleError).not.toHaveBeenCalled() }) }) diff --git a/src/core/tools/__tests__/readFileTool.spec.ts b/src/core/tools/__tests__/readFileTool.spec.ts index a332cbd586a..d8b53c39cb4 100644 --- a/src/core/tools/__tests__/readFileTool.spec.ts +++ b/src/core/tools/__tests__/readFileTool.spec.ts @@ -384,9 +384,9 @@ describe("read_file tool with maxReadFileLine setting", () => { // Execute const result = await executeReadFileTool({}, { maxReadFileLine: -1 }) - // Verify - just check that the result contains the expected elements - expect(result).toContain(`${testFilePath}`) - expect(result).toContain(``) + // Verify - check that the result contains the expected native format elements + expect(result).toContain(`File: ${testFilePath}`) + expect(result).toContain(`Lines 1-5:`) }) it("should not show line snippet in approval message when maxReadFileLine is -1", async () => { @@ -422,17 +422,14 @@ describe("read_file tool with maxReadFileLine setting", () => { }, ) - // Verify - expect(result).toContain(`${testFilePath}`) - expect(result).toContain(``) + // Verify - native format + expect(result).toContain(`File: ${testFilePath}`) + expect(result).toContain(`Code Definitions:`) - // Verify XML structure - expect(result).toContain("Showing only 0 of 5 total lines") - expect(result).toContain("") - expect(result).toContain("") + // Verify native structure + expect(result).toContain("Note: Showing only 0 of 5 total lines") expect(result).toContain(sourceCodeDef.trim()) - expect(result).toContain("") - expect(result).not.toContain(" { // Execute const result = await executeReadFileTool({}, { maxReadFileLine: 3 }) - // Verify - just check that the result contains the expected elements - expect(result).toContain(`${testFilePath}`) - expect(result).toContain(``) - expect(result).toContain(``) - expect(result).toContain("Showing only 3 of 5 total lines") + // Verify - native format + expect(result).toContain(`File: ${testFilePath}`) + expect(result).toContain(`Lines 1-3:`) + expect(result).toContain(`Code Definitions:`) + expect(result).toContain("Note: Showing only 3 of 5 total lines") }) it("should truncate code definitions when file exceeds maxReadFileLine", async () => { @@ -475,17 +472,17 @@ describe("read_file tool with maxReadFileLine setting", () => { // Execute with maxReadFileLine = 30 const result = await executeReadFileTool({}, { maxReadFileLine: 30, totalLines: 100 }) - // Verify that only definitions within the first 30 lines are included - expect(result).toContain(`${testFilePath}`) - expect(result).toContain(``) - expect(result).toContain(``) + // Verify - native format + expect(result).toContain(`File: ${testFilePath}`) + expect(result).toContain(`Lines 1-30:`) + expect(result).toContain(`Code Definitions:`) // Should include foo (starts at line 10) but not bar (starts at line 50) or baz (starts at line 80) expect(result).toContain("10--20 | function foo()") expect(result).not.toContain("50--60 | function bar()") expect(result).not.toContain("80--90 | function baz()") - expect(result).toContain("Showing only 30 of 100 total lines") + expect(result).toContain("Note: Showing only 30 of 100 total lines") }) it("should handle truncation when all definitions are beyond the line limit", async () => { @@ -503,10 +500,10 @@ describe("read_file tool with maxReadFileLine setting", () => { // Execute with maxReadFileLine = 30 const result = await executeReadFileTool({}, { maxReadFileLine: 30, totalLines: 100 }) - // Verify that only the header is included (all definitions filtered out) - expect(result).toContain(`${testFilePath}`) - expect(result).toContain(``) - expect(result).toContain(``) + // Verify - native format + expect(result).toContain(`File: ${testFilePath}`) + expect(result).toContain(`Lines 1-30:`) + expect(result).toContain(`Code Definitions:`) expect(result).toContain("# file.txt") expect(result).not.toContain("50--60 | function foo()") expect(result).not.toContain("80--90 | function bar()") @@ -522,9 +519,9 @@ describe("read_file tool with maxReadFileLine setting", () => { // Execute const result = await executeReadFileTool({}, { maxReadFileLine: 10, totalLines: 5 }) - // Verify - just check that the result contains the expected elements - expect(result).toContain(`${testFilePath}`) - expect(result).toContain(``) + // Verify - native format + expect(result).toContain(`File: ${testFilePath}`) + expect(result).toContain(`Lines 1-5:`) }) it("should read with extractTextFromFile when file has few lines", async () => { @@ -544,9 +541,9 @@ describe("read_file tool with maxReadFileLine setting", () => { // Execute const result = await executeReadFileTool({}, { maxReadFileLine: 5, totalLines: 3 }) - // Verify - just check that the result contains the expected elements - expect(result).toContain(`${testFilePath}`) - expect(result).toContain(``) + // Verify - native format + expect(result).toContain(`File: ${testFilePath}`) + expect(result).toContain(`Lines 1-3:`) }) }) @@ -560,8 +557,8 @@ describe("read_file tool with maxReadFileLine setting", () => { // Execute const result = await executeReadFileTool({}, { maxReadFileLine: 3, totalLines: 3 }) - // Verify - just check basic structure, the actual binary handling may vary - expect(result).toContain(`${testFilePath}`) + // Verify - native format for binary files + expect(result).toContain(`File: ${testFilePath}`) expect(typeof result).toBe("string") }) }) @@ -580,14 +577,14 @@ describe("read_file tool with maxReadFileLine setting", () => { }, ) - // Verify - just check that the result contains the expected elements - expect(rangeResult).toContain(`${testFilePath}`) - expect(rangeResult).toContain(``) + // Verify - native format + expect(rangeResult).toContain(`File: ${testFilePath}`) + expect(rangeResult).toContain(`Lines 2-4:`) }) }) }) -describe("read_file tool XML output structure", () => { +describe("read_file tool output structure", () => { // Test basic XML structure const testFilePath = "test/file.txt" const absoluteFilePath = "/test/file.txt" @@ -695,8 +692,8 @@ describe("read_file tool XML output structure", () => { return toolResult } - describe("Basic XML Structure Tests", () => { - it("should produce XML output with no unnecessary indentation", async () => { + describe("Basic Structure Tests", () => { + it("should produce native output with proper format", async () => { // Setup const numberedContent = "1 | Line 1\n2 | Line 2\n3 | Line 3\n4 | Line 4\n5 | Line 5" @@ -717,24 +714,19 @@ describe("read_file tool XML output structure", () => { // Execute const result = await executeReadFileTool() - // Verify - expect(result).toBe( - `\n${testFilePath}\n\n${numberedContent}\n\n`, - ) + // Verify native format + expect(result).toBe(`File: ${testFilePath}\nLines 1-5:\n${numberedContent}`) }) - it("should follow the correct XML structure format", async () => { + it("should follow the correct native structure format", async () => { // Setup mockInputContent = fileContent // Execute const result = await executeReadFileTool({}, { maxReadFileLine: -1 }) - // Verify using regex to check structure - const xmlStructureRegex = new RegExp( - `^\\n${testFilePath}\\n\\n.*\\n\\n$`, - "s", - ) - expect(result).toMatch(xmlStructureRegex) + // Verify using regex to check native structure + const nativeStructureRegex = new RegExp(`^File: ${testFilePath}\\nLines 1-5:\\n.*$`, "s") + expect(result).toMatch(nativeStructureRegex) }) it("should handle empty files correctly", async () => { @@ -758,10 +750,8 @@ describe("read_file tool XML output structure", () => { // Execute const result = await executeReadFileTool({}, { totalLines: 0 }) - // Verify - expect(result).toBe( - `\n${testFilePath}\nFile is empty\n\n`, - ) + // Verify native format for empty file + expect(result).toBe(`File: ${testFilePath}\nNote: File is empty`) }) describe("Total Image Memory Limit", () => { @@ -1410,7 +1400,7 @@ describe("read_file tool XML output structure", () => { }) describe("Error Handling Tests", () => { - it("should include error tag for invalid path", async () => { + it("should include error in output for invalid path", async () => { // Setup - missing path parameter const toolUse: ReadFileToolUse = { type: "tool_use", @@ -1430,17 +1420,17 @@ describe("read_file tool XML output structure", () => { toolProtocol: "xml", }) - // Verify - expect(toolResult).toBe(`Missing required parameter`) + // Verify - native format for error + expect(toolResult).toBe(`Error: Missing required parameter`) }) - it("should include error tag for RooIgnore error", async () => { + it("should include error for RooIgnore error", async () => { // Execute - skip addLineNumbers check as it returns early with an error const result = await executeReadFileTool({}, { validateAccess: false }) - // Verify + // Verify - native format for error expect(result).toBe( - `\n${testFilePath}Access to ${testFilePath} is blocked by the .rooignore file settings. You must try to continue in the task without using this file, or ask the user to update the .rooignore file.\n`, + `File: ${testFilePath}\nError: Access to ${testFilePath} is blocked by the .rooignore file settings. You must try to continue in the task without using this file, or ask the user to update the .rooignore file.`, ) }) }) @@ -1552,10 +1542,10 @@ describe("read_file tool with image support", () => { const textPart = (result as any[]).find((p) => p.type === "text")?.text const imagePart = (result as any[]).find((p) => p.type === "image") - // Verify text part - expect(textPart).toContain(`${imagePath}`) + // Verify text part - native format + expect(textPart).toContain(`File: ${imagePath}`) expect(textPart).not.toContain("") - expect(textPart).toContain(`Image file`) + expect(textPart).toContain(`Note: Image file`) // Verify image part expect(imagePart).toBeDefined() @@ -1574,10 +1564,10 @@ describe("read_file tool with image support", () => { const textPart = (result as any[]).find((p) => p.type === "text")?.text const imagePart = (result as any[]).find((p) => p.type === "image") - // Verify text part - expect(textPart).toContain(`${testImagePath}`) + // Verify text part - native format + expect(textPart).toContain(`File: ${testImagePath}`) expect(textPart).not.toContain(``) - expect(textPart).toContain(`Image file`) + expect(textPart).toContain(`Note: Image file`) // Verify image part expect(imagePart).toBeDefined() @@ -1595,7 +1585,8 @@ describe("read_file tool with image support", () => { const textArg = callArgs[0] const imagesArg = callArgs[1] - expect(textArg).toContain(`${testImagePath}`) + // Native format + expect(textArg).toContain(`File: ${testImagePath}`) expect(imagesArg).toBeDefined() expect(imagesArg).toBeInstanceOf(Array) expect(imagesArg!.length).toBe(1) @@ -1626,11 +1617,12 @@ describe("read_file tool with image support", () => { // Execute const result = await executeReadImageTool() - // When images are not supported, the tool should return just XML (not call formatResponse.toolResult) + // When images are not supported, the tool should return just text (not call formatResponse.toolResult) expect(toolResultMock).not.toHaveBeenCalled() expect(typeof result).toBe("string") - expect(result).toContain(`${testImagePath}`) - expect(result).toContain(`Image file`) + // Native format + expect(result).toContain(`File: ${testImagePath}`) + expect(result).toContain(`Note: Image file`) }) it("should include images when model supports images", async () => { @@ -1646,7 +1638,8 @@ describe("read_file tool with image support", () => { const textArg = callArgs[0] const imagesArg = callArgs[1] - expect(textArg).toContain(`${testImagePath}`) + // Native format + expect(textArg).toContain(`File: ${testImagePath}`) expect(imagesArg).toBeDefined() // Images should be included expect(imagesArg).toBeInstanceOf(Array) expect(imagesArg!.length).toBe(1) @@ -1660,11 +1653,12 @@ describe("read_file tool with image support", () => { // Execute const result = await executeReadImageTool() - // When supportsImages is undefined, should default to false and return just XML + // When supportsImages is undefined, should default to false and return just text expect(toolResultMock).not.toHaveBeenCalled() expect(typeof result).toBe("string") - expect(result).toContain(`${testImagePath}`) - expect(result).toContain(`Image file`) + // Native format + expect(result).toContain(`File: ${testImagePath}`) + expect(result).toContain(`Note: Image file`) }) it("should handle errors when reading image files", async () => { @@ -1690,8 +1684,8 @@ describe("read_file tool with image support", () => { toolProtocol: "xml", }) - // Verify error handling - expect(toolResult).toContain("Error reading image file: Failed to read image") + // Verify error handling - native format + expect(toolResult).toContain("Error: Error reading image file: Failed to read image") // Verify that say was called to show error to user expect(localMockCline.say).toHaveBeenCalledWith("error", expect.stringContaining("Failed to read image")) }) @@ -1726,9 +1720,9 @@ describe("read_file tool with image support", () => { // Execute const result = await executeReadImageTool(binaryPath) - // Verify + // Verify - native format for binary files expect(result).not.toContain("") - expect(result).toContain(' ({ + default: { + readFile: vi.fn().mockResolvedValue(""), + }, +})) + +vi.mock("path", async () => { + const originalPath = await vi.importActual("path") + return { + ...originalPath, + resolve: vi.fn().mockImplementation((...args) => { + const separator = process.platform === "win32" ? "\\" : "/" + return args.join(separator) + }), + isAbsolute: vi.fn().mockReturnValue(false), + relative: vi.fn().mockImplementation((from, to) => to), + } +}) + +vi.mock("delay", () => ({ + default: vi.fn(), +})) + +vi.mock("../../../utils/fs", () => ({ + fileExistsAtPath: vi.fn().mockResolvedValue(true), +})) + +vi.mock("../../prompts/responses", () => ({ + formatResponse: { + toolError: vi.fn((msg) => `Error: ${msg}`), + rooIgnoreError: vi.fn((path) => `Access denied: ${path}`), + createPrettyPatch: vi.fn(() => "mock-diff"), + }, +})) + +vi.mock("../../../utils/pathUtils", () => ({ + isPathOutsideWorkspace: vi.fn().mockReturnValue(false), +})) + +vi.mock("../../../utils/path", () => ({ + getReadablePath: vi.fn().mockReturnValue("test/path.txt"), +})) + +vi.mock("../../diff/stats", () => ({ + sanitizeUnifiedDiff: vi.fn((diff) => diff), + computeDiffStats: vi.fn(() => ({ additions: 1, deletions: 1 })), +})) + +vi.mock("vscode", () => ({ + window: { + showWarningMessage: vi.fn().mockResolvedValue(undefined), + }, + env: { + openExternal: vi.fn(), + }, + Uri: { + parse: vi.fn(), + }, +})) + +describe("searchAndReplaceTool", () => { + // Test data + const testFilePath = "test/file.txt" + const absoluteFilePath = process.platform === "win32" ? "C:\\test\\file.txt" : "/test/file.txt" + const testFileContent = "Line 1\nLine 2\nLine 3\nLine 4" + + // Mocked functions + const mockedFileExistsAtPath = fileExistsAtPath as MockedFunction + const mockedFsReadFile = fs.readFile as unknown as MockedFunction< + (path: string, encoding: string) => Promise + > + const mockedIsPathOutsideWorkspace = isPathOutsideWorkspace as MockedFunction + const mockedGetReadablePath = getReadablePath as MockedFunction + const mockedPathResolve = path.resolve as MockedFunction + const mockedPathIsAbsolute = path.isAbsolute as MockedFunction + + const mockTask: any = {} + let mockAskApproval: ReturnType + let mockHandleError: ReturnType + let mockPushToolResult: ReturnType + let mockRemoveClosingTag: ReturnType + let toolResult: ToolResponse | undefined + + beforeEach(() => { + vi.clearAllMocks() + + mockedPathResolve.mockReturnValue(absoluteFilePath) + mockedPathIsAbsolute.mockReturnValue(false) + mockedFileExistsAtPath.mockResolvedValue(true) + mockedFsReadFile.mockResolvedValue(testFileContent) + mockedIsPathOutsideWorkspace.mockReturnValue(false) + mockedGetReadablePath.mockReturnValue("test/path.txt") + + mockTask.cwd = "/" + mockTask.consecutiveMistakeCount = 0 + mockTask.didEditFile = false + mockTask.providerRef = { + deref: vi.fn().mockReturnValue({ + getState: vi.fn().mockResolvedValue({ + diagnosticsEnabled: true, + writeDelayMs: 1000, + experiments: {}, + }), + }), + } + mockTask.rooIgnoreController = { + validateAccess: vi.fn().mockReturnValue(true), + } + mockTask.rooProtectedController = { + isWriteProtected: vi.fn().mockReturnValue(false), + } + mockTask.diffViewProvider = { + editType: undefined, + isEditing: false, + originalContent: "", + open: vi.fn().mockResolvedValue(undefined), + update: vi.fn().mockResolvedValue(undefined), + reset: vi.fn().mockResolvedValue(undefined), + revertChanges: vi.fn().mockResolvedValue(undefined), + saveChanges: vi.fn().mockResolvedValue({ + newProblemsMessage: "", + userEdits: null, + finalContent: "final content", + }), + saveDirectly: vi.fn().mockResolvedValue(undefined), + scrollToFirstDiff: vi.fn(), + pushToolWriteResult: vi.fn().mockResolvedValue("Tool result message"), + } + mockTask.fileContextTracker = { + trackFileContext: vi.fn().mockResolvedValue(undefined), + } + mockTask.say = vi.fn().mockResolvedValue(undefined) + mockTask.ask = vi.fn().mockResolvedValue(undefined) + mockTask.recordToolError = vi.fn() + mockTask.recordToolUsage = vi.fn() + mockTask.processQueuedMessages = vi.fn() + mockTask.sayAndCreateMissingParamError = vi.fn().mockResolvedValue("Missing param error") + + mockAskApproval = vi.fn().mockResolvedValue(true) + mockHandleError = vi.fn().mockResolvedValue(undefined) + mockRemoveClosingTag = vi.fn((tag, content) => content) + + toolResult = undefined + }) + + /** + * Helper function to execute the search and replace tool with different parameters + */ + async function executeSearchAndReplaceTool( + params: Partial = {}, + options: { + fileExists?: boolean + fileContent?: string + isPartial?: boolean + accessAllowed?: boolean + } = {}, + ): Promise { + const fileExists = options.fileExists ?? true + const fileContent = options.fileContent ?? testFileContent + const isPartial = options.isPartial ?? false + const accessAllowed = options.accessAllowed ?? true + + mockedFileExistsAtPath.mockResolvedValue(fileExists) + mockedFsReadFile.mockResolvedValue(fileContent) + mockTask.rooIgnoreController.validateAccess.mockReturnValue(accessAllowed) + + const toolUse: ToolUse = { + type: "tool_use", + name: "search_and_replace", + params: { + path: testFilePath, + operations: JSON.stringify([{ search: "Line 2", replace: "Modified Line 2" }]), + ...params, + }, + partial: isPartial, + } + + mockPushToolResult = vi.fn((result: ToolResponse) => { + toolResult = result + }) + + await searchAndReplaceTool.handle(mockTask, toolUse as ToolUse<"search_and_replace">, { + askApproval: mockAskApproval, + handleError: mockHandleError, + pushToolResult: mockPushToolResult, + removeClosingTag: mockRemoveClosingTag, + toolProtocol: "native", + }) + + return toolResult + } + + describe("parameter validation", () => { + it("returns error when path is missing", async () => { + const result = await executeSearchAndReplaceTool({ path: undefined }) + + expect(result).toBe("Missing param error") + expect(mockTask.consecutiveMistakeCount).toBe(1) + expect(mockTask.recordToolError).toHaveBeenCalledWith("search_and_replace") + }) + + it("returns error when operations is missing", async () => { + const result = await executeSearchAndReplaceTool({ operations: undefined }) + + expect(result).toContain("Error:") + expect(result).toContain("Missing or empty 'operations' parameter") + expect(mockTask.consecutiveMistakeCount).toBe(1) + }) + + it("returns error when operations is empty array", async () => { + const result = await executeSearchAndReplaceTool({ operations: JSON.stringify([]) }) + + expect(result).toContain("Error:") + expect(result).toContain("Missing or empty 'operations' parameter") + expect(mockTask.consecutiveMistakeCount).toBe(1) + }) + }) + + describe("file access", () => { + it("returns error when file does not exist", async () => { + const result = await executeSearchAndReplaceTool({}, { fileExists: false }) + + expect(result).toContain("Error:") + expect(result).toContain("File not found") + expect(mockTask.consecutiveMistakeCount).toBe(1) + }) + + it("returns error when access is denied", async () => { + const result = await executeSearchAndReplaceTool({}, { accessAllowed: false }) + + expect(result).toContain("Access denied") + }) + }) + + describe("search and replace logic", () => { + it("returns error when no match is found", async () => { + const result = await executeSearchAndReplaceTool( + { operations: JSON.stringify([{ search: "NonExistent", replace: "New" }]) }, + { fileContent: "Line 1\nLine 2\nLine 3" }, + ) + + expect(result).toContain("Error:") + expect(result).toContain("No match found") + expect(mockTask.consecutiveMistakeCount).toBe(1) + expect(mockTask.recordToolError).toHaveBeenCalledWith("search_and_replace", "no_match") + }) + + it("returns error when multiple matches are found", async () => { + const result = await executeSearchAndReplaceTool( + { operations: JSON.stringify([{ search: "Line", replace: "Row" }]) }, + { fileContent: "Line 1\nLine 2\nLine 3" }, + ) + + expect(result).toContain("Error:") + expect(result).toContain("3 matches") + expect(mockTask.consecutiveMistakeCount).toBe(1) + }) + + it("successfully replaces single unique match", async () => { + await executeSearchAndReplaceTool( + { operations: JSON.stringify([{ search: "Line 2", replace: "Modified Line 2" }]) }, + { fileContent: "Line 1\nLine 2\nLine 3" }, + ) + + expect(mockTask.consecutiveMistakeCount).toBe(0) + expect(mockTask.diffViewProvider.editType).toBe("modify") + expect(mockAskApproval).toHaveBeenCalled() + }) + }) + + describe("CRLF normalization", () => { + it("normalizes CRLF to LF when reading file", async () => { + const contentWithCRLF = "Line 1\r\nLine 2\r\nLine 3" + + await executeSearchAndReplaceTool( + { operations: JSON.stringify([{ search: "Line 2", replace: "Modified Line 2" }]) }, + { fileContent: contentWithCRLF }, + ) + + expect(mockTask.consecutiveMistakeCount).toBe(0) + expect(mockAskApproval).toHaveBeenCalled() + }) + + it("normalizes CRLF in search string to match LF-normalized file content", async () => { + // File has CRLF line endings + const contentWithCRLF = "Line 1\r\nLine 2\r\nLine 3" + // Search string also has CRLF (simulating what the model might send) + const searchWithCRLF = "Line 1\r\nLine 2" + + await executeSearchAndReplaceTool( + { operations: JSON.stringify([{ search: searchWithCRLF, replace: "Modified Lines" }]) }, + { fileContent: contentWithCRLF }, + ) + + expect(mockTask.consecutiveMistakeCount).toBe(0) + expect(mockAskApproval).toHaveBeenCalled() + }) + + it("matches LF search string against CRLF file content after normalization", async () => { + // File has CRLF line endings + const contentWithCRLF = "Line 1\r\nLine 2\r\nLine 3" + // Search string has LF (typical model output) + const searchWithLF = "Line 1\nLine 2" + + await executeSearchAndReplaceTool( + { operations: JSON.stringify([{ search: searchWithLF, replace: "Modified Lines" }]) }, + { fileContent: contentWithCRLF }, + ) + + expect(mockTask.consecutiveMistakeCount).toBe(0) + expect(mockAskApproval).toHaveBeenCalled() + }) + }) + + describe("approval workflow", () => { + it("saves changes when user approves", async () => { + mockAskApproval.mockResolvedValue(true) + + await executeSearchAndReplaceTool() + + expect(mockTask.diffViewProvider.saveChanges).toHaveBeenCalled() + expect(mockTask.didEditFile).toBe(true) + expect(mockTask.recordToolUsage).toHaveBeenCalledWith("search_and_replace") + }) + + it("reverts changes when user rejects", async () => { + mockAskApproval.mockResolvedValue(false) + + const result = await executeSearchAndReplaceTool() + + expect(mockTask.diffViewProvider.revertChanges).toHaveBeenCalled() + expect(mockTask.diffViewProvider.saveChanges).not.toHaveBeenCalled() + expect(result).toContain("rejected") + }) + }) + + describe("partial block handling", () => { + it("handles partial block without errors", async () => { + await executeSearchAndReplaceTool({}, { isPartial: true }) + + expect(mockTask.ask).toHaveBeenCalled() + }) + }) + + describe("error handling", () => { + it("handles file read errors gracefully", async () => { + mockedFsReadFile.mockRejectedValueOnce(new Error("Read failed")) + + const toolUse: ToolUse = { + type: "tool_use", + name: "search_and_replace", + params: { + path: testFilePath, + operations: JSON.stringify([{ search: "Line 2", replace: "Modified" }]), + }, + partial: false, + } + + let capturedResult: ToolResponse | undefined + const localPushToolResult = vi.fn((result: ToolResponse) => { + capturedResult = result + }) + + await searchAndReplaceTool.handle(mockTask, toolUse as ToolUse<"search_and_replace">, { + askApproval: mockAskApproval, + handleError: mockHandleError, + pushToolResult: localPushToolResult, + removeClosingTag: mockRemoveClosingTag, + toolProtocol: "native", + }) + + expect(capturedResult).toContain("Error:") + expect(capturedResult).toContain("Failed to read file") + expect(mockTask.consecutiveMistakeCount).toBe(1) + }) + + it("handles general errors and resets diff view", async () => { + mockTask.diffViewProvider.open.mockRejectedValueOnce(new Error("General error")) + + await executeSearchAndReplaceTool() + + expect(mockHandleError).toHaveBeenCalledWith("search and replace", expect.any(Error)) + expect(mockTask.diffViewProvider.reset).toHaveBeenCalled() + }) + }) + + describe("file tracking", () => { + it("tracks file context after successful edit", async () => { + await executeSearchAndReplaceTool() + + expect(mockTask.fileContextTracker.trackFileContext).toHaveBeenCalledWith(testFilePath, "roo_edited") + }) + }) +}) diff --git a/src/core/tools/__tests__/searchReplaceTool.spec.ts b/src/core/tools/__tests__/searchReplaceTool.spec.ts index b9b1e453d4a..984808e9715 100644 --- a/src/core/tools/__tests__/searchReplaceTool.spec.ts +++ b/src/core/tools/__tests__/searchReplaceTool.spec.ts @@ -379,4 +379,48 @@ describe("searchReplaceTool", () => { expect(mockCline.fileContextTracker.trackFileContext).toHaveBeenCalledWith(testFilePath, "roo_edited") }) }) + + describe("CRLF normalization", () => { + it("normalizes CRLF to LF when reading file", async () => { + const contentWithCRLF = "Line 1\r\nLine 2\r\nLine 3" + + await executeSearchReplaceTool( + { old_string: "Line 2", new_string: "Modified Line 2" }, + { fileContent: contentWithCRLF }, + ) + + expect(mockCline.consecutiveMistakeCount).toBe(0) + expect(mockAskApproval).toHaveBeenCalled() + }) + + it("normalizes CRLF in old_string to match LF-normalized file content", async () => { + // File has CRLF line endings + const contentWithCRLF = "Line 1\r\nLine 2\r\nLine 3" + // Search string also has CRLF (simulating what the model might send) + const searchWithCRLF = "Line 1\r\nLine 2" + + await executeSearchReplaceTool( + { old_string: searchWithCRLF, new_string: "Modified Lines" }, + { fileContent: contentWithCRLF }, + ) + + expect(mockCline.consecutiveMistakeCount).toBe(0) + expect(mockAskApproval).toHaveBeenCalled() + }) + + it("matches LF old_string against CRLF file content after normalization", async () => { + // File has CRLF line endings + const contentWithCRLF = "Line 1\r\nLine 2\r\nLine 3" + // Search string has LF (typical model output) + const searchWithLF = "Line 1\nLine 2" + + await executeSearchReplaceTool( + { old_string: searchWithLF, new_string: "Modified Lines" }, + { fileContent: contentWithCRLF }, + ) + + expect(mockCline.consecutiveMistakeCount).toBe(0) + expect(mockAskApproval).toHaveBeenCalled() + }) + }) }) diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index dc847a77088..7da27f095bc 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -168,7 +168,7 @@ export class ClineProvider public isViewLaunched = false public settingsImportedAt?: number - public readonly latestAnnouncementId = "dec-2025-v3.36.0-context-rewind-roo-provider" // v3.36.0 Context Rewind & Roo Provider Improvements + public readonly latestAnnouncementId = "dec-2025-v3.37.0-minimax-m21-glm47-custom-tools" // v3.37.0 MiniMax M2.1, GLM-4.7, & Experimental Custom Tools public readonly providerSettingsManager: ProviderSettingsManager public readonly customModesManager: CustomModesManager diff --git a/src/utils/__tests__/resolveToolProtocol.spec.ts b/src/utils/__tests__/resolveToolProtocol.spec.ts index 929d2fa3236..513a7eaa35e 100644 --- a/src/utils/__tests__/resolveToolProtocol.spec.ts +++ b/src/utils/__tests__/resolveToolProtocol.spec.ts @@ -5,269 +5,73 @@ import type { ProviderSettings, ModelInfo } from "@roo-code/types" import type { Anthropic } from "@anthropic-ai/sdk" describe("resolveToolProtocol", () => { - describe("Precedence Level 1: User Profile Setting", () => { - it("should use profile toolProtocol when explicitly set to xml", () => { + /** + * XML Protocol Deprecation: + * + * XML tool protocol has been fully deprecated. All models now use Native + * tool calling. User preferences and model defaults are ignored. + * + * Precedence: + * 1. Locked Protocol (for resumed tasks that used XML) + * 2. Native (always, for all new tasks) + */ + + describe("Locked Protocol (Precedence Level 0 - Highest Priority)", () => { + it("should return lockedProtocol when provided", () => { const settings: ProviderSettings = { - toolProtocol: "xml", - apiProvider: "anthropic", - } - const result = resolveToolProtocol(settings) - expect(result).toBe(TOOL_PROTOCOL.XML) - }) - - it("should use profile toolProtocol when explicitly set to native", () => { - const settings: ProviderSettings = { - toolProtocol: "native", - apiProvider: "anthropic", - } - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - supportsNativeTools: true, // Model supports native tools - } - const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.NATIVE) - }) - - it("should override model default when profile setting is present", () => { - const settings: ProviderSettings = { - toolProtocol: "xml", + toolProtocol: "xml", // Ignored apiProvider: "openai-native", } - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - defaultToolProtocol: "native", - supportsNativeTools: true, - } - const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.XML) // Profile setting wins - }) - - it("should override model capability when profile setting is present", () => { - const settings: ProviderSettings = { - toolProtocol: "xml", - apiProvider: "openai-native", - } - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - supportsNativeTools: true, - } - const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.XML) // Profile setting wins - }) - }) - - describe("Precedence Level 2: Model Default", () => { - it("should use model defaultToolProtocol when no profile setting", () => { - const settings: ProviderSettings = { - apiProvider: "roo", - } - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - defaultToolProtocol: "native", - supportsNativeTools: true, // Model must support native tools - } - const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.NATIVE) // Model default wins when experiment is disabled - }) - - it("should override model capability when model default is present", () => { - const settings: ProviderSettings = { - apiProvider: "roo", - } - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - defaultToolProtocol: "xml", - supportsNativeTools: true, - } - const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.XML) // Model default wins over capability + // lockedProtocol overrides everything + const result = resolveToolProtocol(settings, undefined, "native") + expect(result).toBe(TOOL_PROTOCOL.NATIVE) }) - }) - describe("Support Validation", () => { - it("should fall back to XML when model doesn't support native", () => { + it("should return XML lockedProtocol for resumed tasks that used XML", () => { const settings: ProviderSettings = { + toolProtocol: "native", // Ignored apiProvider: "anthropic", } - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - supportsNativeTools: false, - } - const result = resolveToolProtocol(settings, modelInfo) + // lockedProtocol forces XML for backward compatibility + const result = resolveToolProtocol(settings, undefined, "xml") expect(result).toBe(TOOL_PROTOCOL.XML) }) - it("should fall back to XML when user prefers native but model doesn't support it", () => { + it("should fall through to Native when lockedProtocol is undefined", () => { const settings: ProviderSettings = { - toolProtocol: "native", // User wants native + toolProtocol: "xml", // Ignored apiProvider: "anthropic", } - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - supportsNativeTools: false, // But model doesn't support it - } - const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.XML) // Falls back to XML due to lack of support - }) - - it("should fall back to XML when user prefers native but model support is undefined", () => { - const settings: ProviderSettings = { - toolProtocol: "native", // User wants native - apiProvider: "anthropic", - } - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - // supportsNativeTools is undefined (not specified) - } - const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.XML) // Falls back to XML - undefined treated as unsupported - }) - }) - - describe("Precedence Level 3: Native Fallback", () => { - it("should use Native fallback when no model default is specified and model supports native", () => { - const settings: ProviderSettings = { - apiProvider: "anthropic", - } - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - supportsNativeTools: true, - } - const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.XML) // XML fallback + // undefined lockedProtocol should return native + const result = resolveToolProtocol(settings, undefined, undefined) + expect(result).toBe(TOOL_PROTOCOL.NATIVE) }) }) - describe("Complete Precedence Chain", () => { - it("should respect full precedence: Profile > Model Default > Native Fallback", () => { - // Set up a scenario with all levels defined - const settings: ProviderSettings = { - toolProtocol: "native", // Level 1: User profile setting - apiProvider: "roo", - } - - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - defaultToolProtocol: "xml", // Level 2: Model default - supportsNativeTools: true, // Support check - } - - const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.NATIVE) // Profile setting wins - }) - - it("should skip to model default when profile setting is undefined", () => { - const settings: ProviderSettings = { - apiProvider: "openai-native", - } - - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - defaultToolProtocol: "xml", // Level 2 - supportsNativeTools: true, // Support check - } - - const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.XML) // Model default wins - }) - - it("should skip to Native fallback when profile and model default are undefined", () => { - const settings: ProviderSettings = { - apiProvider: "openai-native", - } - - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - supportsNativeTools: true, - } - - const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.XML) // XML fallback - }) - - it("should skip to XML fallback when model info is unavailable", () => { + describe("Native Protocol Always Used For New Tasks", () => { + it("should always use native for new tasks", () => { const settings: ProviderSettings = { apiProvider: "anthropic", } - - const result = resolveToolProtocol(settings, undefined) - expect(result).toBe(TOOL_PROTOCOL.XML) // XML fallback (no model info means no native support) + const result = resolveToolProtocol(settings) + expect(result).toBe(TOOL_PROTOCOL.NATIVE) }) - }) - describe("Locked Protocol (Precedence Level 0)", () => { - it("should return lockedProtocol when provided, ignoring all other settings", () => { + it("should use native even when user preference is XML (user prefs ignored)", () => { const settings: ProviderSettings = { - toolProtocol: "xml", // User wants XML + toolProtocol: "xml", // User wants XML - ignored apiProvider: "openai-native", } - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - supportsNativeTools: true, - defaultToolProtocol: "xml", - } - // lockedProtocol overrides everything - const result = resolveToolProtocol(settings, modelInfo, "native") + const result = resolveToolProtocol(settings) expect(result).toBe(TOOL_PROTOCOL.NATIVE) }) - it("should return XML lockedProtocol even when model supports native", () => { + it("should use native for OpenAI compatible provider", () => { const settings: ProviderSettings = { - toolProtocol: "native", // User wants native - apiProvider: "anthropic", - } - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - supportsNativeTools: true, // Model supports native - defaultToolProtocol: "native", - } - // lockedProtocol forces XML - const result = resolveToolProtocol(settings, modelInfo, "xml") - expect(result).toBe(TOOL_PROTOCOL.XML) - }) - - it("should fall through to normal resolution when lockedProtocol is undefined", () => { - const settings: ProviderSettings = { - toolProtocol: "xml", - apiProvider: "anthropic", - } - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - supportsNativeTools: true, + apiProvider: "openai", } - // undefined lockedProtocol should use normal precedence - const result = resolveToolProtocol(settings, modelInfo, undefined) - expect(result).toBe(TOOL_PROTOCOL.XML) // User setting wins + const result = resolveToolProtocol(settings, openAiModelInfoSaneDefaults) + expect(result).toBe(TOOL_PROTOCOL.NATIVE) }) }) @@ -275,7 +79,7 @@ describe("resolveToolProtocol", () => { it("should handle missing provider name gracefully", () => { const settings: ProviderSettings = {} const result = resolveToolProtocol(settings) - expect(result).toBe(TOOL_PROTOCOL.XML) // Falls back to XML (no model info) + expect(result).toBe(TOOL_PROTOCOL.NATIVE) // Always native now }) it("should handle undefined model info gracefully", () => { @@ -283,26 +87,18 @@ describe("resolveToolProtocol", () => { apiProvider: "openai-native", } const result = resolveToolProtocol(settings, undefined) - expect(result).toBe(TOOL_PROTOCOL.XML) // XML fallback (no model info) + expect(result).toBe(TOOL_PROTOCOL.NATIVE) // Always native now }) - it("should fall back to XML when model doesn't support native", () => { - const settings: ProviderSettings = { - apiProvider: "roo", - } - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - supportsNativeTools: false, // Model doesn't support native - } - const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.XML) // Falls back to XML due to lack of support + it("should handle empty settings", () => { + const settings: ProviderSettings = {} + const result = resolveToolProtocol(settings) + expect(result).toBe(TOOL_PROTOCOL.NATIVE) // Always native now }) }) describe("Real-world Scenarios", () => { - it("should use Native fallback for models without defaultToolProtocol", () => { + it("should use Native for OpenAI models", () => { const settings: ProviderSettings = { apiProvider: "openai-native", } @@ -313,10 +109,10 @@ describe("resolveToolProtocol", () => { supportsNativeTools: true, } const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.XML) // XML fallback + expect(result).toBe(TOOL_PROTOCOL.NATIVE) }) - it("should use XML for Claude models with Anthropic provider", () => { + it("should use Native for Claude models", () => { const settings: ProviderSettings = { apiProvider: "anthropic", } @@ -324,65 +120,39 @@ describe("resolveToolProtocol", () => { maxTokens: 8192, contextWindow: 200000, supportsPromptCache: true, - supportsNativeTools: false, - } - const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.XML) - }) - - it("should allow user to force XML on native-supporting model", () => { - const settings: ProviderSettings = { - toolProtocol: "xml", // User explicitly wants XML - apiProvider: "openai-native", - } - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - supportsNativeTools: true, // Model supports native but user wants XML - defaultToolProtocol: "native", + supportsNativeTools: true, } const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.XML) // User preference wins + expect(result).toBe(TOOL_PROTOCOL.NATIVE) }) - it("should not allow user to force native when model doesn't support it", () => { + it("should honor locked protocol for resumed tasks that used XML", () => { const settings: ProviderSettings = { - toolProtocol: "native", // User wants native apiProvider: "anthropic", } - const modelInfo: ModelInfo = { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: false, - supportsNativeTools: false, // Model doesn't support native - } - const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.XML) // Falls back to XML due to lack of support + // Task was started when XML was used, so it's locked to XML + const result = resolveToolProtocol(settings, undefined, "xml") + expect(result).toBe(TOOL_PROTOCOL.XML) }) + }) - it("should use model default when available", () => { + describe("Backward Compatibility - User Preferences Ignored", () => { + it("should ignore user preference for XML", () => { const settings: ProviderSettings = { - apiProvider: "roo", - } - const modelInfo: ModelInfo = { - maxTokens: 8192, - contextWindow: 200000, - supportsPromptCache: true, - defaultToolProtocol: "xml", - supportsNativeTools: true, + toolProtocol: "xml", // User explicitly wants XML - ignored + apiProvider: "openai-native", } - const result = resolveToolProtocol(settings, modelInfo) - expect(result).toBe(TOOL_PROTOCOL.XML) // Model default wins + const result = resolveToolProtocol(settings) + expect(result).toBe(TOOL_PROTOCOL.NATIVE) // Native is always used }) - it("should use native tools for OpenAI compatible provider with default model info", () => { + it("should return native regardless of user preference", () => { const settings: ProviderSettings = { - apiProvider: "openai", + toolProtocol: "native", // User preference - ignored but happens to match + apiProvider: "anthropic", } - // Using the actual openAiModelInfoSaneDefaults to verify the fix - const result = resolveToolProtocol(settings, openAiModelInfoSaneDefaults) - expect(result).toBe(TOOL_PROTOCOL.NATIVE) // Should use native tools by default + const result = resolveToolProtocol(settings) + expect(result).toBe(TOOL_PROTOCOL.NATIVE) }) }) }) diff --git a/src/utils/resolveToolProtocol.ts b/src/utils/resolveToolProtocol.ts index 757c2fb96eb..16a6360dff5 100644 --- a/src/utils/resolveToolProtocol.ts +++ b/src/utils/resolveToolProtocol.ts @@ -12,49 +12,57 @@ type ApiMessageForDetection = Anthropic.MessageParam & { } /** - * Resolve the effective tool protocol based on the precedence hierarchy: + * Resolve the effective tool protocol. * - * 0. Locked Protocol (task-level lock, if provided - highest priority) - * 1. User Preference - Per-Profile (explicit profile setting) - * 2. Model Default (defaultToolProtocol in ModelInfo) - * 3. Native Fallback (final fallback) + * **Deprecation Note (XML Protocol):** + * XML tool protocol has been deprecated. All models now use Native tool calling. + * User/profile preferences (`providerSettings.toolProtocol`) and model defaults + * (`modelInfo.defaultToolProtocol`) are ignored. * - * Then check support: if protocol is "native" but model doesn't support it, use XML. + * Precedence: + * 1. Locked Protocol (task-level lock for resumed tasks - highest priority) + * 2. Native (always, for all new tasks) * - * @param providerSettings - The provider settings for the current profile - * @param modelInfo - Optional model information containing capabilities + * @param _providerSettings - The provider settings (toolProtocol field is ignored) + * @param _modelInfo - Unused, kept for API compatibility * @param lockedProtocol - Optional task-locked protocol that takes absolute precedence * @returns The resolved tool protocol (either "xml" or "native") */ export function resolveToolProtocol( - providerSettings: ProviderSettings, - modelInfo?: ModelInfo, + _providerSettings: ProviderSettings, + _modelInfo?: ModelInfo & { [key: string]: any }, lockedProtocol?: ToolProtocol, ): ToolProtocol { - // 0. Locked Protocol - task-level lock takes absolute precedence - // This ensures tasks continue using their original protocol even if settings change + // 1. Locked Protocol - task-level lock takes absolute precedence + // This ensures resumed tasks continue using their original protocol if (lockedProtocol) { return lockedProtocol } - // If model doesn't support native tools, return XML immediately - // Treat undefined as unsupported (only allow native when explicitly true) - if (modelInfo?.supportsNativeTools !== true) { - return TOOL_PROTOCOL.XML - } + if (["openai", "zgsm"].includes(_providerSettings.apiProvider || "")) { + // If model doesn't support native tools, return XML immediately + // Treat undefined as unsupported (only allow native when explicitly true) + if (_modelInfo?.supportsNativeTools === true) { + return TOOL_PROTOCOL.NATIVE + } - // 1. User Preference - Per-Profile (explicit profile setting, highest priority) - if (providerSettings.toolProtocol) { - return providerSettings.toolProtocol - } + // 1. User Preference - Per-Profile (explicit profile setting, highest priority) + if (_providerSettings.toolProtocol) { + return _providerSettings.toolProtocol + } + + // 2. Model Default - model's preferred protocol + if (_modelInfo?.defaultToolProtocol) { + return _modelInfo.defaultToolProtocol + } - // 2. Model Default - model's preferred protocol - if (modelInfo?.defaultToolProtocol) { - return modelInfo.defaultToolProtocol + // 3. Native Fallback + return _providerSettings.apiProvider === "zgsm" ? TOOL_PROTOCOL.XML : TOOL_PROTOCOL.NATIVE } - // 3. Native Fallback - return TOOL_PROTOCOL.XML + // 2. Always return Native protocol for new tasks + // All models now support native tools; XML is deprecated + return TOOL_PROTOCOL.NATIVE } /** diff --git a/webview-ui/src/components/chat/Announcement.tsx b/webview-ui/src/components/chat/Announcement.tsx index 993186bbbbc..f69e18065a6 100644 --- a/webview-ui/src/components/chat/Announcement.tsx +++ b/webview-ui/src/components/chat/Announcement.tsx @@ -44,8 +44,9 @@ const Announcement = ({ hideAnnouncement }: AnnouncementProps) => {

{t("chat:announcement.release.heading")}

    -
  • {t("chat:announcement.release.contextRewind")}
  • -
  • {t("chat:announcement.release.rooProvider")}
  • +
  • {t("chat:announcement.release.minimaxM21")}
  • +
  • {t("chat:announcement.release.glm47")}
  • +
  • {t("chat:announcement.release.customTools")}
diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx index ee8dac85876..5d6d4f72f0c 100644 --- a/webview-ui/src/components/settings/ApiOptions.tsx +++ b/webview-ui/src/components/settings/ApiOptions.tsx @@ -473,15 +473,9 @@ const ApiOptions = ({ // } // }, [selectedProvider]) - // Calculate the default protocol that would be used if toolProtocol is not set - // Mirrors the simplified logic in resolveToolProtocol.ts: - // 1. User preference (toolProtocol) - handled by the select value binding - // 2. Model default - use if available - // 3. Native fallback - const defaultProtocol = selectedModelInfo?.defaultToolProtocol || TOOL_PROTOCOL.XML - - // Show the tool protocol selector when model supports native tools. - // For OpenAI Compatible providers we always show it so users can force XML/native explicitly. + const defaultProtocol = + selectedModelInfo?.defaultToolProtocol || + (selectedProvider === "zgsm" ? TOOL_PROTOCOL.XML : TOOL_PROTOCOL.NATIVE) const showToolProtocolSelector = ["openai", "zgsm"].includes(selectedProvider) || selectedModelInfo?.supportsNativeTools === true // Convert providers to SearchableSelect options diff --git a/webview-ui/src/i18n/locales/en/chat.json b/webview-ui/src/i18n/locales/en/chat.json index 2ad4801de0a..1bcaa210a8a 100644 --- a/webview-ui/src/i18n/locales/en/chat.json +++ b/webview-ui/src/i18n/locales/en/chat.json @@ -339,8 +339,9 @@ }, "release": { "heading": "What's New:", - "contextRewind": "Improved context condensing now lets you restore the full previous context when rewinding to a checkpoint", - "rooProvider": "Roo Code Cloud provider now preserves reasoning content and defaults to native tools for better performance" + "minimaxM21": "Fast and affordable MiniMax M2.1 model now available in Roo Code Cloud, MiniMax, and more", + "glm47": "Z.AI GLM-4.7 model with thinking mode support added to Roo Code Cloud, Z.AI, and more", + "customTools": "Experimental custom tool support for defining your own tools in TypeScript" }, "cloudAgents": { "heading": "New in the Cloud:", diff --git a/webview-ui/src/i18n/locales/zh-CN/chat.json b/webview-ui/src/i18n/locales/zh-CN/chat.json index eff3fea7f37..af1eb38ea26 100644 --- a/webview-ui/src/i18n/locales/zh-CN/chat.json +++ b/webview-ui/src/i18n/locales/zh-CN/chat.json @@ -332,9 +332,10 @@ "goToSettingsButton": "前往设置" }, "release": { - "heading": "最新功能:", - "contextRewind": "改进的上下文压缩现在允许您在回到存档点时恢复完整的之前上下文", - "rooProvider": "Roo Code Cloud 提供商现在保留推理内容,并默认使用原生工具以获得更好的性能" + "heading": "新增功能:", + "minimaxM21": "快速且经济高效的 MiniMax M2.1 模型现在在 Roo Code Cloud、MiniMax 等中可用", + "glm47": "支持思考模式的 Z.AI GLM-4.7 模型已添加到 Roo Code Cloud、Z.AI 等", + "customTools": "用于在 TypeScript 中定义自己的工具的实验性自定义工具支持" }, "cloudAgents": { "heading": "云端新功能:", diff --git a/webview-ui/src/i18n/locales/zh-TW/chat.json b/webview-ui/src/i18n/locales/zh-TW/chat.json index b3e5d231046..cbe599f9658 100644 --- a/webview-ui/src/i18n/locales/zh-TW/chat.json +++ b/webview-ui/src/i18n/locales/zh-TW/chat.json @@ -342,9 +342,10 @@ "goToSettingsButton": "前往設定" }, "release": { - "heading": "最新功能:", - "contextRewind": "改進的上下文壓縮現在允許您在回到存檔點時恢復完整的先前上下文", - "rooProvider": "Roo Code Cloud 提供商現在保留推理內容,並預設使用原生工具以獲得更佳效能" + "heading": "新增功能:", + "minimaxM21": "迅速且經濟高效的 MiniMax M2.1 模型現在在 Roo Code Cloud、MiniMax 等中提供", + "glm47": "支援思考模式的 Z.AI GLM-4.7 模型已新增到 Roo Code Cloud、Z.AI 等", + "customTools": "用於在 TypeScript 中定義自己工具的實驗性自訂工具支援" }, "cloudAgents": { "heading": "雲端的新功能:",