From 0e110a2d47d54b3f9222a9c3c8a07d442aaafbde Mon Sep 17 00:00:00 2001 From: NaccOll Date: Thu, 11 Sep 2025 10:10:03 +0800 Subject: [PATCH 1/6] feat: Add support for native tool calls - Implemented applyDiffToolLegacy to handle tool calls with diff content. - Enhanced attemptCompletionTool to push tool result messages when tool calls are enabled. - Updated multiApplyDiffTool to process diffs with search and replace functionality. - Introduced ToolCallSettingsControl component for managing tool call settings in the UI. - Added localization for tool call settings in multiple languages. - Updated settings view to conditionally render tool call options based on provider support. - Refactored utility functions to determine tool call support based on API provider. - Enhanced error handling and user feedback for tool call operations. --- packages/types/src/provider-settings.ts | 1 + src/api/index.ts | 11 +- src/api/providers/__tests__/lmstudio.spec.ts | 111 +++ .../__tests__/openai-tool-call.spec.ts | 201 +++++ .../__tests__/openrouter-tool-call.spec.ts | 161 ++++ .../providers/__tests__/roo-tool-call.spec.ts | 143 ++++ .../base-openai-compatible-provider.ts | 6 + src/api/providers/base-provider.ts | 30 + src/api/providers/lm-studio.ts | 22 +- src/api/providers/openai.ts | 8 + src/api/providers/openrouter.ts | 12 +- src/api/providers/roo.ts | 4 + src/api/transform/stream.ts | 9 + .../AssistantMessageParser.ts | 11 +- .../__tests__/AssistantMessageParser.spec.ts | 87 +++ .../__tests__/parseAssistantMessage.spec.ts | 57 ++ .../parseAssistantMessage.ts | 14 +- .../presentAssistantMessage.ts | 70 +- src/core/config/ProviderSettingsManager.ts | 18 + .../__tests__/ProviderSettingsManager.spec.ts | 1 + src/core/prompts/responses.ts | 17 +- .../__tests__/tool-use-guidelines.spec.ts | 41 +- src/core/prompts/sections/modes.ts | 11 +- .../prompts/sections/tool-use-guidelines.ts | 10 +- src/core/prompts/sections/tool-use.ts | 13 +- src/core/prompts/system.ts | 6 +- src/core/prompts/tools/index.ts | 68 +- .../schemas/access-mcp-resource-schema.ts | 44 ++ .../tools/schemas/apply-diff-schema.ts | 366 +++++++++ .../schemas/ask-followup-question-schema.ts | 49 ++ .../schemas/attempt-completion-schema.ts | 39 + .../prompts/tools/schemas/base-tool-schema.ts | 109 +++ .../tools/schemas/browser-action-schema.ts | 101 +++ .../tools/schemas/codebase-search-schema.ts | 46 ++ .../tools/schemas/execute-command-schema.ts | 27 + .../schemas/fetch-instructions-schema.ts | 42 + .../tools/schemas/generate-image-schema.ts | 41 + .../tools/schemas/insert-content-schema.ts | 62 ++ .../list-code-definition-names-schema.ts | 40 + .../tools/schemas/list-files-schema.ts | 42 + .../prompts/tools/schemas/new-task-schema.ts | 94 +++ .../prompts/tools/schemas/read-file-schema.ts | 155 ++++ .../tools/schemas/run-slash-command-schema.ts | 53 ++ .../schemas/search-and-replace-schema.ts | 91 +++ .../tools/schemas/search-files-schema.ts | 51 ++ .../tools/schemas/switch-mode-schema.ts | 40 + .../prompts/tools/schemas/tool-registry.ts | 151 ++++ .../tools/schemas/update-todo-list-schema.ts | 90 +++ .../tools/schemas/use-mcp-tool-schema.ts | 66 ++ .../tools/schemas/write-to-file-schema.ts | 69 ++ src/core/prompts/tools/tool-availability.ts | 106 +++ src/core/prompts/types.ts | 1 + src/core/task/Task.ts | 268 +++++-- .../task/__tests__/tool-call-helper.spec.ts | 447 +++++++++++ src/core/task/tool-call-helper.ts | 722 ++++++++++++++++++ .../__tests__/applyDiffTool.tool-call.spec.ts | 110 +++ .../__tests__/attemptCompletionTool.spec.ts | 62 ++ .../__tests__/multiApplyDiffTool.spec.ts | 120 +++ src/core/tools/applyDiffTool.ts | 31 + src/core/tools/attemptCompletionTool.ts | 16 +- src/core/tools/multiApplyDiffTool.ts | 40 +- src/core/tools/writeToFileTool.ts | 7 +- src/core/webview/generateSystemPrompt.ts | 3 + src/shared/tools.ts | 11 + .../src/components/settings/ApiOptions.tsx | 11 + .../src/components/settings/SettingsView.tsx | 6 + .../settings/ToolCallSettingsControl.tsx | 35 + webview-ui/src/i18n/locales/ca/settings.json | 4 + webview-ui/src/i18n/locales/de/settings.json | 4 + webview-ui/src/i18n/locales/en/settings.json | 4 + webview-ui/src/i18n/locales/es/settings.json | 4 + webview-ui/src/i18n/locales/fr/settings.json | 4 + webview-ui/src/i18n/locales/hi/settings.json | 4 + webview-ui/src/i18n/locales/id/settings.json | 4 + webview-ui/src/i18n/locales/it/settings.json | 4 + webview-ui/src/i18n/locales/ja/settings.json | 4 + webview-ui/src/i18n/locales/ko/settings.json | 4 + webview-ui/src/i18n/locales/nl/settings.json | 4 + webview-ui/src/i18n/locales/pl/settings.json | 4 + .../src/i18n/locales/pt-BR/settings.json | 4 + webview-ui/src/i18n/locales/ru/settings.json | 4 + webview-ui/src/i18n/locales/tr/settings.json | 4 + webview-ui/src/i18n/locales/vi/settings.json | 4 + .../src/i18n/locales/zh-CN/settings.json | 4 + .../src/i18n/locales/zh-TW/settings.json | 4 + 85 files changed, 4929 insertions(+), 150 deletions(-) create mode 100644 src/api/providers/__tests__/openai-tool-call.spec.ts create mode 100644 src/api/providers/__tests__/openrouter-tool-call.spec.ts create mode 100644 src/api/providers/__tests__/roo-tool-call.spec.ts create mode 100644 src/core/prompts/tools/schemas/access-mcp-resource-schema.ts create mode 100644 src/core/prompts/tools/schemas/apply-diff-schema.ts create mode 100644 src/core/prompts/tools/schemas/ask-followup-question-schema.ts create mode 100644 src/core/prompts/tools/schemas/attempt-completion-schema.ts create mode 100644 src/core/prompts/tools/schemas/base-tool-schema.ts create mode 100644 src/core/prompts/tools/schemas/browser-action-schema.ts create mode 100644 src/core/prompts/tools/schemas/codebase-search-schema.ts create mode 100644 src/core/prompts/tools/schemas/execute-command-schema.ts create mode 100644 src/core/prompts/tools/schemas/fetch-instructions-schema.ts create mode 100644 src/core/prompts/tools/schemas/generate-image-schema.ts create mode 100644 src/core/prompts/tools/schemas/insert-content-schema.ts create mode 100644 src/core/prompts/tools/schemas/list-code-definition-names-schema.ts create mode 100644 src/core/prompts/tools/schemas/list-files-schema.ts create mode 100644 src/core/prompts/tools/schemas/new-task-schema.ts create mode 100644 src/core/prompts/tools/schemas/read-file-schema.ts create mode 100644 src/core/prompts/tools/schemas/run-slash-command-schema.ts create mode 100644 src/core/prompts/tools/schemas/search-and-replace-schema.ts create mode 100644 src/core/prompts/tools/schemas/search-files-schema.ts create mode 100644 src/core/prompts/tools/schemas/switch-mode-schema.ts create mode 100644 src/core/prompts/tools/schemas/tool-registry.ts create mode 100644 src/core/prompts/tools/schemas/update-todo-list-schema.ts create mode 100644 src/core/prompts/tools/schemas/use-mcp-tool-schema.ts create mode 100644 src/core/prompts/tools/schemas/write-to-file-schema.ts create mode 100644 src/core/prompts/tools/tool-availability.ts create mode 100644 src/core/task/__tests__/tool-call-helper.spec.ts create mode 100644 src/core/task/tool-call-helper.ts create mode 100644 src/core/tools/__tests__/applyDiffTool.tool-call.spec.ts create mode 100644 webview-ui/src/components/settings/ToolCallSettingsControl.tsx diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts index 4dfeacbf07cc..d740de23b68f 100644 --- a/packages/types/src/provider-settings.ts +++ b/packages/types/src/provider-settings.ts @@ -100,6 +100,7 @@ const baseProviderSettingsSchema = z.object({ includeMaxTokens: z.boolean().optional(), diffEnabled: z.boolean().optional(), todoListEnabled: z.boolean().optional(), + toolCallEnabled: z.boolean().optional(), fuzzyMatchThreshold: z.number().optional(), modelTemperature: z.number().nullish(), rateLimitSeconds: z.number().optional(), diff --git a/src/api/index.ts b/src/api/index.ts index ac0096767624..a62a34c5a82f 100644 --- a/src/api/index.ts +++ b/src/api/index.ts @@ -1,6 +1,6 @@ import { Anthropic } from "@anthropic-ai/sdk" -import type { ProviderSettings, ModelInfo } from "@roo-code/types" +import type { ProviderSettings, ModelInfo, ToolName } from "@roo-code/types" import { ApiStream } from "./transform/stream" @@ -42,6 +42,7 @@ import { DeepInfraHandler, } from "./providers" import { NativeOllamaHandler } from "./providers/native-ollama" +import { ToolArgs } from "../core/prompts/tools/types" export interface SingleCompletionHandler { completePrompt(prompt: string): Promise @@ -65,6 +66,14 @@ export interface ApiHandlerCreateMessageMetadata { * @default true */ store?: boolean + /** + * tool call + */ + tools?: ToolName[] + /** + * tool call args + */ + toolArgs?: ToolArgs } export interface ApiHandler { diff --git a/src/api/providers/__tests__/lmstudio.spec.ts b/src/api/providers/__tests__/lmstudio.spec.ts index 0adebdeea7ac..7dd415c4e8d1 100644 --- a/src/api/providers/__tests__/lmstudio.spec.ts +++ b/src/api/providers/__tests__/lmstudio.spec.ts @@ -164,4 +164,115 @@ describe("LmStudioHandler", () => { expect(modelInfo.info.contextWindow).toBe(128_000) }) }) + describe("LmStudioHandler Tool Calling", () => { + let handler: LmStudioHandler + let mockOptions: ApiHandlerOptions + + beforeEach(() => { + mockOptions = { + apiModelId: "local-model", + lmStudioModelId: "local-model", + lmStudioBaseUrl: "http://localhost:1234", + } + handler = new LmStudioHandler(mockOptions) + mockCreate.mockClear() + }) + + describe("createMessage with tool calls", () => { + const systemPrompt = "You are a helpful assistant." + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: "Hello!", + }, + ] + + it("should include tool call parameters when tools are provided", async () => { + mockCreate.mockImplementation(async function* () { + yield { + choices: [ + { + delta: { content: "Test response" }, + index: 0, + }, + ], + usage: null, + } + }) + + const stream = handler.createMessage(systemPrompt, messages, { + tools: ["test_tool" as any], + taskId: "test-task-id", + }) + + // Consume the stream + for await (const _ of stream) { + // + } + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + tools: expect.any(Array), + tool_choice: "auto", + }), + ) + }) + + it("should yield tool_call chunks when model returns tool calls", async () => { + const toolCallChunk = { + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + id: "tool-call-1", + type: "function", + function: { + name: "test_tool", + arguments: '{"param1":"value1"}', + }, + }, + ], + }, + index: 0, + }, + ], + } + const finalChunk = { + choices: [ + { + delta: {}, + finish_reason: "tool_calls", + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + } + + mockCreate.mockImplementation(async function* () { + yield toolCallChunk + yield finalChunk + }) + + const stream = handler.createMessage(systemPrompt, messages, { + tools: ["test_tool" as any], + taskId: "test-task-id", + }) + + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const toolCallChunks = chunks.filter((c) => c.type === "tool_call") + expect(toolCallChunks.length).toBe(1) + expect(toolCallChunks[0].toolCalls).toEqual(toolCallChunk.choices[0].delta.tool_calls) + expect(toolCallChunks[0].toolCallType).toBe("openai") + }) + }) + }) }) diff --git a/src/api/providers/__tests__/openai-tool-call.spec.ts b/src/api/providers/__tests__/openai-tool-call.spec.ts new file mode 100644 index 000000000000..59a70a1f85d5 --- /dev/null +++ b/src/api/providers/__tests__/openai-tool-call.spec.ts @@ -0,0 +1,201 @@ +// npx vitest run api/providers/__tests__/openai-tool-call.spec.ts + +import { OpenAiHandler } from "../openai" +import { ApiHandlerOptions } from "../../../shared/api" +import OpenAI from "openai" +import { getToolRegistry } from "../../../core/prompts/tools/schemas/tool-registry" +import { ToolName } from "@roo-code/types" + +const mockCreate = vitest.fn() +const mockGenerateFunctionCallSchemas = vitest.fn() + +vitest.mock("openai", () => { + const mockConstructor = vitest.fn() + return { + __esModule: true, + default: mockConstructor.mockImplementation(() => ({ + chat: { + completions: { + create: mockCreate, + }, + }, + })), + } +}) + +vitest.mock("../../../core/prompts/tools/schemas/tool-registry", () => ({ + getToolRegistry: () => ({ + generateFunctionCallSchemas: mockGenerateFunctionCallSchemas, + }), +})) + +describe("OpenAiHandler Tool Call", () => { + let handler: OpenAiHandler + let mockOptions: ApiHandlerOptions + + beforeEach(() => { + mockOptions = { + openAiApiKey: "test-api-key", + openAiModelId: "gpt-4", + openAiBaseUrl: "https://api.openai.com/v1", + } + handler = new OpenAiHandler(mockOptions) + mockCreate.mockClear() + mockGenerateFunctionCallSchemas.mockClear() + }) + + it("should include tools and tool_choice in the request when metadata.tools are provided", async () => { + const systemPrompt = "You are a helpful assistant." + const messages = [ + { + role: "user" as const, + content: "Hello!", + }, + ] + const metadata = { + taskId: "test-task-id", + tools: ["read_file" as ToolName], + toolArgs: { cwd: ".", supportsComputerUse: true }, + } + + mockGenerateFunctionCallSchemas.mockReturnValue([ + { + type: "function" as const, + function: { + name: "read_file", + description: "A function to interact with files.", + parameters: {}, + }, + }, + ]) + + mockCreate.mockImplementation(async function* () { + yield { + choices: [ + { + delta: { content: "Test response" }, + index: 0, + }, + ], + usage: null, + } + }) + + const stream = handler.createMessage(systemPrompt, messages, metadata) + + for await (const _ of stream) { + // Consume stream + } + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + tools: [ + { + type: "function", + function: { + name: "read_file", + description: "A function to interact with files.", + parameters: {}, + }, + }, + ], + tool_choice: "auto", + }), + expect.any(Object), + ) + }) + + it("should yield a tool_call event when the API returns tool_calls", async () => { + const systemPrompt = "You are a helpful assistant." + const messages = [ + { + role: "user" as const, + content: "Hello!", + }, + ] + const metadata = { + taskId: "test-task-id", + tools: ["write_to_file" as ToolName], + toolArgs: { cwd: ".", supportsComputerUse: true }, + } + + mockCreate.mockImplementation(async function* () { + yield { + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + id: "call_123", + type: "function", + function: { + name: "write_to_file", + arguments: '{"query":"test"}', + }, + }, + ], + }, + index: 0, + }, + ], + } + }) + + const stream = handler.createMessage(systemPrompt, messages, metadata) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const toolCallChunk = chunks.find((chunk) => chunk.type === "tool_call") + + expect(toolCallChunk).toBeDefined() + expect(toolCallChunk.toolCalls).toEqual([ + { + index: 0, + id: "call_123", + type: "function", + function: { + name: "write_to_file", + arguments: '{"query":"test"}', + }, + }, + ]) + }) + + it("should not include tools and tool_choice in the request when metadata.tools are not provided", async () => { + const systemPrompt = "You are a helpful assistant." + const messages = [ + { + role: "user" as const, + content: "Hello!", + }, + ] + + mockCreate.mockImplementation(async function* () { + yield { + choices: [ + { + delta: { content: "Test response" }, + index: 0, + }, + ], + usage: null, + } + }) + + const stream = handler.createMessage(systemPrompt, messages) + for await (const _ of stream) { + // Consume stream + } + + expect(mockCreate).toHaveBeenCalledWith( + expect.not.objectContaining({ + tools: expect.any(Array), + tool_choice: expect.any(String), + }), + expect.any(Object), + ) + }) +}) diff --git a/src/api/providers/__tests__/openrouter-tool-call.spec.ts b/src/api/providers/__tests__/openrouter-tool-call.spec.ts new file mode 100644 index 000000000000..c211e8014525 --- /dev/null +++ b/src/api/providers/__tests__/openrouter-tool-call.spec.ts @@ -0,0 +1,161 @@ +// npx vitest run src/api/providers/__tests__/openrouter-tool-call.spec.ts + +// Mock vscode first to avoid import errors +vitest.mock("vscode", () => ({})) + +import { Anthropic } from "@anthropic-ai/sdk" +import OpenAI from "openai" + +import { OpenRouterHandler } from "../openrouter" +import { ApiHandlerOptions } from "../../../shared/api" +import { Package } from "../../../shared/package" +import { getToolRegistry } from "../../../core/prompts/tools/schemas/tool-registry" +import { ApiHandlerCreateMessageMetadata } from "../.." + +// Mock dependencies +vitest.mock("openai") +vitest.mock("delay", () => ({ default: vitest.fn(() => Promise.resolve()) })) +vitest.mock("../fetchers/modelCache", () => ({ + getModels: vitest.fn().mockImplementation(() => { + return Promise.resolve({ + "anthropic/claude-sonnet-4": { + maxTokens: 8192, + contextWindow: 200000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 3, + outputPrice: 15, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + description: "Claude 3.7 Sonnet", + thinking: false, + supportsComputerUse: true, + }, + }) + }), +})) +vitest.mock("../../../core/prompts/tools/schemas/tool-registry") + +describe("OpenRouterHandler Tool Call", () => { + const mockOptions: ApiHandlerOptions = { + openRouterApiKey: "test-key", + openRouterModelId: "anthropic/claude-sonnet-4", + } + + beforeEach(() => { + vitest.clearAllMocks() + const mockToolRegistry = { + generateFunctionCallSchemas: vitest.fn().mockReturnValue([ + { + type: "function", + function: { + name: "read_file", + description: "A test tool", + parameters: { + type: "object", + properties: {}, + required: [], + }, + }, + }, + ]), + } + ;(getToolRegistry as any).mockReturnValue(mockToolRegistry) + }) + + it("should include tool call parameters when tools are provided", async () => { + const handler = new OpenRouterHandler(mockOptions) + + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + id: mockOptions.openRouterModelId, + choices: [{ delta: { content: "test response" } }], + } + }, + } + + const mockCreate = vitest.fn().mockResolvedValue(mockStream) + + ;(OpenAI as any).prototype.chat = { + completions: { create: mockCreate }, + } as any + + const systemPrompt = "test system prompt" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user" as const, content: "test message" }] + const metadata: ApiHandlerCreateMessageMetadata = { + taskId: "test-task-id", + tools: ["read_file"], + toolArgs: {} as any, + } + + const generator = handler.createMessage(systemPrompt, messages, metadata) + await generator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + tools: [ + { + type: "function", + function: { + name: "read_file", + description: "A test tool", + parameters: { + type: "object", + properties: {}, + required: [], + }, + }, + }, + ], + tool_choice: "auto", + }), + ) + }) + + it("should yield tool_call chunk when tool_calls are in the stream", async () => { + const handler = new OpenRouterHandler(mockOptions) + const toolCalls = [ + { + index: 0, + id: "tool-call-1", + function: { name: "read_file", arguments: "{}" }, + type: "function", + }, + ] + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + id: mockOptions.openRouterModelId, + choices: [{ delta: { tool_calls: toolCalls } }], + } + }, + } + + const mockCreate = vitest.fn().mockResolvedValue(mockStream) + + ;(OpenAI as any).prototype.chat = { + completions: { create: mockCreate }, + } as any + + const systemPrompt = "test system prompt" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user" as const, content: "test message" }] + const metadata: ApiHandlerCreateMessageMetadata = { + taskId: "test-task-id", + tools: ["read_file"], + toolArgs: {} as any, + } + + const generator = handler.createMessage(systemPrompt, messages, metadata) + const chunks = [] + for await (const chunk of generator) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ + type: "tool_call", + toolCalls, + toolCallType: "openai", + }) + }) +}) diff --git a/src/api/providers/__tests__/roo-tool-call.spec.ts b/src/api/providers/__tests__/roo-tool-call.spec.ts new file mode 100644 index 000000000000..cc12b5f549fd --- /dev/null +++ b/src/api/providers/__tests__/roo-tool-call.spec.ts @@ -0,0 +1,143 @@ +// npx vitest run api/providers/__tests__/roo-tool-call.spec.ts + +import { Anthropic } from "@anthropic-ai/sdk" + +import { ApiHandlerOptions } from "../../../shared/api" + +// Mock OpenAI client +const mockCreate = vitest.fn() + +vitest.mock("openai", () => { + return { + __esModule: true, + default: vitest.fn().mockImplementation(() => ({ + chat: { + completions: { + create: mockCreate.mockImplementation(async (options) => { + if (!options.stream) { + return { + id: "test-completion", + choices: [ + { + message: { role: "assistant", content: "Test response" }, + finish_reason: "stop", + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + } + } + + return { + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + id: "tool-call-1", + function: { + name: "test-tool", + arguments: '{"arg1":"value1"}', + }, + type: "function", + }, + ], + }, + index: 0, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: {}, + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + } + }, + } + }), + }, + }, + })), + } +}) + +// Mock CloudService +const mockGetSessionTokenFn = vitest.fn() +const mockHasInstanceFn = vitest.fn() + +vitest.mock("@roo-code/cloud", () => ({ + CloudService: { + hasInstance: () => mockHasInstanceFn(), + get instance() { + return { + authService: { + getSessionToken: () => mockGetSessionTokenFn(), + }, + } + }, + }, +})) + +// Import after mocks are set up +import { RooHandler } from "../roo" + +describe("RooHandler Tool Call", () => { + let handler: RooHandler + let mockOptions: ApiHandlerOptions + const systemPrompt = "You are a helpful assistant." + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: "Hello!", + }, + ] + + beforeEach(() => { + mockOptions = { + apiModelId: "xai/grok-code-fast-1", + } + mockHasInstanceFn.mockReturnValue(true) + mockGetSessionTokenFn.mockReturnValue("test-session-token") + mockCreate.mockClear() + vitest.clearAllMocks() + }) + + it("should handle tool_calls in streaming responses", async () => { + handler = new RooHandler(mockOptions) + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const toolCallChunks = chunks.filter((chunk) => chunk.type === "tool_call") + expect(toolCallChunks).toHaveLength(1) + expect(toolCallChunks[0].toolCalls).toEqual([ + { + index: 0, + id: "tool-call-1", + function: { + name: "test-tool", + arguments: '{"arg1":"value1"}', + }, + type: "function", + }, + ]) + }) +}) diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts index fb6c5d03770e..cb1e8b78bd6c 100644 --- a/src/api/providers/base-openai-compatible-provider.ts +++ b/src/api/providers/base-openai-compatible-provider.ts @@ -11,6 +11,7 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" import { handleOpenAIError } from "./utils/openai-error-handler" +import { ToolRegistry } from "../../core/prompts/tools/schemas/tool-registry" type BaseOpenAiCompatibleProviderOptions = ApiHandlerOptions & { providerName: string @@ -85,6 +86,11 @@ export abstract class BaseOpenAiCompatibleProvider stream_options: { include_usage: true }, } + if (metadata?.tools && metadata.tools.length > 0) { + params.tools = ToolRegistry.getInstance().generateFunctionCallSchemas(metadata.tools, metadata.toolArgs) + params.tool_choice = "auto" + } + try { return this.client.chat.completions.create(params, requestOptions) } catch (error) { diff --git a/src/api/providers/base-provider.ts b/src/api/providers/base-provider.ts index 1abbf5f558cb..0890231158bb 100644 --- a/src/api/providers/base-provider.ts +++ b/src/api/providers/base-provider.ts @@ -32,4 +32,34 @@ export abstract class BaseProvider implements ApiHandler { return countTokens(content, { useWorker: true }) } + + /** + * Convert tool schemas to text format for token counting + */ + protected convertToolSchemasToText(toolSchemas: Anthropic.ToolUnion[]): string { + if (toolSchemas.length === 0) { + return "" + } + + const toolsDescription = toolSchemas + .map((tool) => { + // Handle different tool types by accessing properties safely + const toolName = tool.name + let toolText = `Tool: ${toolName}\n` + + // Try to access description and input_schema properties + if ("description" in tool) { + toolText += `Description: ${tool.description}\n` + } + + if ("input_schema" in tool && tool.input_schema && typeof tool.input_schema === "object") { + toolText += `Parameters:\n${JSON.stringify(tool.input_schema, null, 2)}\n` + } + + return toolText + }) + .join("\n---\n") + + return `Available Tools:\n${toolsDescription}` + } } diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts index 6c58a96ae1fa..1e5310fa1189 100644 --- a/src/api/providers/lm-studio.ts +++ b/src/api/providers/lm-studio.ts @@ -16,6 +16,7 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". import { getModels, getModelsFromCache } from "./fetchers/modelCache" import { getApiRequestTimeout } from "./utils/timeout-config" import { handleOpenAIError } from "./utils/openai-error-handler" +import { getToolRegistry } from "../../core/prompts/tools/schemas/tool-registry" export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions @@ -45,6 +46,8 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan { role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages), ] + const toolCallEnabled = metadata?.tools && metadata.tools.length > 0 + const toolRegistry = getToolRegistry() // ------------------------- // Track token usage @@ -73,7 +76,17 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan let inputTokens = 0 try { - inputTokens = await this.countTokens([{ type: "text", text: systemPrompt }, ...toContentBlocks(messages)]) + const inputMessages: Anthropic.Messages.ContentBlockParam[] = [{ type: "text", text: systemPrompt }] + if (toolCallEnabled) { + const toolSchemas: Anthropic.ToolUnion[] = toolRegistry.generateAnthropicToolSchemas( + metadata.tools!, + metadata.toolArgs, + ) + const toolsText = this.convertToolSchemasToText(toolSchemas) + inputMessages.push({ type: "text", text: toolsText }) + } + inputMessages.push(...toContentBlocks(messages)) + inputTokens = await this.countTokens(inputMessages) } catch (err) { console.error("[LmStudio] Failed to count input tokens:", err) inputTokens = 0 @@ -88,6 +101,10 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE, stream: true, } + if (toolCallEnabled) { + params.tools = toolRegistry.generateFunctionCallSchemas(metadata.tools!, metadata.toolArgs) + params.tool_choice = "auto" + } if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) { params.draft_model = this.options.lmStudioDraftModelId @@ -118,6 +135,9 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan yield processedChunk } } + if (delta?.tool_calls) { + yield { type: "tool_call", toolCalls: delta.tool_calls, toolCallType: "openai" } + } } for (const processedChunk of matcher.final()) { diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index aebe671712a7..561dc34654f5 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -25,6 +25,7 @@ import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { getApiRequestTimeout } from "./utils/timeout-config" import { handleOpenAIError } from "./utils/openai-error-handler" +import { getToolRegistry } from "../../core/prompts/tools/schemas/tool-registry" // TODO: Rename this to OpenAICompatibleHandler. Also, I think the // `OpenAINativeHandler` can subclass from this, since it's obviously @@ -165,6 +166,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ...(isGrokXAI ? {} : { stream_options: { include_usage: true } }), ...(reasoning && reasoning), } + if (metadata?.tools && metadata.tools.length > 0) { + requestOptions.tools = getToolRegistry().generateFunctionCallSchemas(metadata.tools!, metadata.toolArgs) + requestOptions.tool_choice = "auto" + } // Add max_tokens if needed this.addMaxTokensIfNeeded(requestOptions, modelInfo) @@ -205,6 +210,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl text: (delta.reasoning_content as string | undefined) || "", } } + if (delta?.tool_calls) { + yield { type: "tool_call", toolCalls: delta.tool_calls, toolCallType: "openai" } + } if (chunk.usage) { lastUsage = chunk.usage } diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 580b17331194..395a0de6abac 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -24,8 +24,9 @@ import { getModelEndpoints } from "./fetchers/modelEndpointCache" import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" -import type { SingleCompletionHandler } from "../index" +import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { handleOpenAIError } from "./utils/openai-error-handler" +import { getToolRegistry } from "../../core/prompts/tools/schemas/tool-registry" // Image generation types interface ImageGenerationResponse { @@ -101,6 +102,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH override async *createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, ): AsyncGenerator { const model = await this.fetchModel() @@ -162,6 +164,10 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH ...(transforms && { transforms }), ...(reasoning && { reasoning }), } + if (metadata?.tools && metadata.tools.length > 0) { + completionParams.tools = getToolRegistry().generateFunctionCallSchemas(metadata.tools!, metadata.toolArgs!) + completionParams.tool_choice = "auto" + } let stream try { @@ -190,6 +196,10 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH yield { type: "text", text: delta.content } } + if (delta?.tool_calls) { + yield { type: "tool_call", toolCalls: delta.tool_calls, toolCallType: "openai" } + } + if (chunk.usage) { lastUsage = chunk.usage } diff --git a/src/api/providers/roo.ts b/src/api/providers/roo.ts index 44b016086276..589d5fdb065c 100644 --- a/src/api/providers/roo.ts +++ b/src/api/providers/roo.ts @@ -55,6 +55,10 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { } } + if (delta?.tool_calls) { + yield { type: "tool_call", toolCalls: delta.tool_calls, toolCallType: "openai" } + } + if ("reasoning_content" in delta && typeof delta.reasoning_content === "string") { yield { type: "reasoning", diff --git a/src/api/transform/stream.ts b/src/api/transform/stream.ts index 8484e6259580..23d4605a2348 100644 --- a/src/api/transform/stream.ts +++ b/src/api/transform/stream.ts @@ -1,3 +1,5 @@ +import { ToolCallProviderType } from "../../shared/tools" + export type ApiStream = AsyncGenerator export type ApiStreamChunk = @@ -6,6 +8,7 @@ export type ApiStreamChunk = | ApiStreamReasoningChunk | ApiStreamGroundingChunk | ApiStreamError + | ApiStreamToolCallChunk export interface ApiStreamError { type: "error" @@ -43,3 +46,9 @@ export interface GroundingSource { url: string snippet?: string } + +export interface ApiStreamToolCallChunk { + type: "tool_call" + toolCalls: any + toolCallType: ToolCallProviderType +} diff --git a/src/core/assistant-message/AssistantMessageParser.ts b/src/core/assistant-message/AssistantMessageParser.ts index 364ec603f220..7dccbd77390e 100644 --- a/src/core/assistant-message/AssistantMessageParser.ts +++ b/src/core/assistant-message/AssistantMessageParser.ts @@ -1,6 +1,7 @@ import { type ToolName, toolNames } from "@roo-code/types" import { TextContent, ToolUse, ToolParamName, toolParamNames } from "../../shared/tools" import { AssistantMessageContent } from "./parseAssistantMessage" +import { ToolCallParam } from "../task/tool-call-helper" /** * Parser for assistant messages. Maintains state between chunks @@ -51,7 +52,7 @@ export class AssistantMessageParser { * Process a new chunk of text and update the parser state. * @param chunk The new chunk of text to process. */ - public processChunk(chunk: string): AssistantMessageContent[] { + public processChunk(chunk: string, toolCallParam?: ToolCallParam): AssistantMessageContent[] { if (this.accumulator.length + chunk.length > this.MAX_ACCUMULATOR_SIZE) { throw new Error("Assistant message exceeds maximum allowed size") } @@ -62,6 +63,9 @@ export class AssistantMessageParser { const char = chunk[i] this.accumulator += char const currentPosition = accumulatorStartLength + i + if (this.currentToolUse && toolCallParam?.anthropicContent) { + this.currentToolUse.toolUseParam = toolCallParam.anthropicContent + } // There should not be a param without a tool use. if (this.currentToolUse && this.currentParamName) { @@ -174,6 +178,11 @@ export class AssistantMessageParser { name: extractedToolName as ToolName, params: {}, partial: true, + toolUseId: toolCallParam && toolCallParam.toolUserId ? toolCallParam.toolUserId : undefined, + toolUseParam: + toolCallParam && toolCallParam?.anthropicContent + ? toolCallParam?.anthropicContent + : undefined, } this.currentToolUseStartIndex = this.accumulator.length diff --git a/src/core/assistant-message/__tests__/AssistantMessageParser.spec.ts b/src/core/assistant-message/__tests__/AssistantMessageParser.spec.ts index 6b7c3915ee7e..a8d7cd284c5c 100644 --- a/src/core/assistant-message/__tests__/AssistantMessageParser.spec.ts +++ b/src/core/assistant-message/__tests__/AssistantMessageParser.spec.ts @@ -392,3 +392,90 @@ describe("AssistantMessageParser (streaming)", () => { }) }) }) + +describe("AssistantMessageParser (tool-call)", () => { + let parser: AssistantMessageParser + + beforeEach(() => { + parser = new AssistantMessageParser() + }) + + it("should handle a tool use with toolCallParam providing a toolUseId", () => { + const message = "src/file.ts" + const toolCallParam: any = { + providerType: "openai", + toolName: "read_file", + toolUserId: "tool-use-id-123", + chunkContent: "", + originContent: [], + } + const result = parser.processChunk(message, toolCallParam) + const toolUse = result.find((block) => block.type === "tool_use") as ToolUse + expect(toolUse).toBeDefined() + expect(toolUse.toolUseId).toBe("tool-use-id-123") + expect(toolUse.name).toBe("read_file") + expect(toolUse.params.path).toBe("src/file.ts") + }) + + it("should handle a tool use with toolCallParam providing anthropicContent", () => { + const message = "src/file.ts" + const anthropicContent: any = { + id: "tool-use-id-456", + name: "read_file", + input: { path: "src/file.ts" }, + type: "tool_use", + } + const toolCallParam: any = { + providerType: "openai", + toolName: "read_file", + toolUserId: "tool-use-id-456", + chunkContent: "", + originContent: [], + anthropicContent: anthropicContent, + } + const result = parser.processChunk(message, toolCallParam) + const toolUse = result.find((block) => block.type === "tool_use") as ToolUse + expect(toolUse).toBeDefined() + expect(toolUse.toolUseId).toBe("tool-use-id-456") + expect(toolUse.toolUseParam).toEqual(anthropicContent) + expect(toolUse.name).toBe("read_file") + expect(toolUse.params.path).toBe("src/file.ts") + }) + + it("should update toolUseParam when anthropicContent is provided mid-stream", () => { + const toolCallParam: any = { + providerType: "openai", + toolName: "read_file", + toolUserId: "tool-use-id-789", + chunkContent: "", + originContent: [], + } + parser.processChunk("", toolCallParam) + const anthropicContent: any = { + id: "tool-use-id-789", + name: "read_file", + input: { path: "src/file.ts" }, + type: "tool_use", + } + const result = parser.processChunk("src/file.ts", { + ...toolCallParam, + anthropicContent: anthropicContent, + }) + const toolUse = result.find((block) => block.type === "tool_use") as ToolUse + expect(toolUse).toBeDefined() + expect(toolUse.toolUseId).toBe("tool-use-id-789") + expect(toolUse.toolUseParam).toEqual(anthropicContent) + expect(toolUse.params.path).toBe("src/file.ts") + }) + + it("should parse a tool use without toolCallParam", () => { + const message = "src/file.ts" + const result = parser.processChunk(message) + const toolUse = result.find((block) => block.type === "tool_use") as ToolUse + expect(toolUse).toBeDefined() + expect(toolUse.toolUseId).toBeUndefined() + expect(toolUse.toolUseParam).toBeUndefined() + expect(toolUse.name).toBe("read_file") + expect(toolUse.params.path).toBe("src/file.ts") + }) +}) diff --git a/src/core/assistant-message/__tests__/parseAssistantMessage.spec.ts b/src/core/assistant-message/__tests__/parseAssistantMessage.spec.ts index f5ae600beed4..27ccc8e506e4 100644 --- a/src/core/assistant-message/__tests__/parseAssistantMessage.spec.ts +++ b/src/core/assistant-message/__tests__/parseAssistantMessage.spec.ts @@ -4,6 +4,8 @@ import { TextContent, ToolUse } from "../../../shared/tools" import { AssistantMessageContent, parseAssistantMessage as parseAssistantMessageV1 } from "../parseAssistantMessage" import { parseAssistantMessageV2 } from "../parseAssistantMessageV2" +import { ToolCallParam } from "../../task/tool-call-helper" +import { parseAssistantMessage } from "../parseAssistantMessage" const isEmptyTextContent = (block: AssistantMessageContent) => block.type === "text" && (block as TextContent).content === "" @@ -338,3 +340,58 @@ const isEmptyTextContent = (block: AssistantMessageContent) => }) }) }) + +describe("parseAssistantMessage with toolCallParam", () => { + it("should assign toolUseId and toolUseParam when toolCallParam is provided", () => { + const message = "src/file.ts" + const toolCallParam: ToolCallParam = { + providerType: "anthropic", + toolName: "read_file", + chunkContent: "", + originContent: [], + toolUserId: "test-id", + anthropicContent: { + type: "tool_use", + id: "test-id", + name: "read_file", + input: { + path: "src/file.ts", + }, + }, + } + + const result = parseAssistantMessage(message, toolCallParam) + const toolUse = result.find((c) => c.type === "tool_use") as ToolUse + + expect(toolUse).toBeDefined() + expect(toolUse.toolUseId).toBe("test-id") + expect(toolUse.toolUseParam).toEqual(toolCallParam.anthropicContent) + }) + + it("should correctly assign toolUseParam during streaming", () => { + const message = "src/file.ts" // Partial message + const toolCallParam: ToolCallParam = { + providerType: "anthropic", + toolName: "read_file", + chunkContent: "", + originContent: [], + toolUserId: "test-id-2", + anthropicContent: { + type: "tool_use", + id: "test-id-2", + name: "read_file", + input: { + path: "src/file.ts", + }, + }, + } + + const result = parseAssistantMessage(message, toolCallParam) + const toolUse = result.find((c) => c.type === "tool_use") as ToolUse + + expect(toolUse).toBeDefined() + expect(toolUse.partial).toBe(true) + expect(toolUse.toolUseId).toBe("test-id-2") + expect(toolUse.toolUseParam).toEqual(toolCallParam.anthropicContent) + }) +}) diff --git a/src/core/assistant-message/parseAssistantMessage.ts b/src/core/assistant-message/parseAssistantMessage.ts index ebb8674c8fa4..7f9f8e0d1e8e 100644 --- a/src/core/assistant-message/parseAssistantMessage.ts +++ b/src/core/assistant-message/parseAssistantMessage.ts @@ -1,10 +1,14 @@ import { type ToolName, toolNames } from "@roo-code/types" import { TextContent, ToolUse, ToolParamName, toolParamNames } from "../../shared/tools" +import { ToolCallParam } from "../task/tool-call-helper" export type AssistantMessageContent = TextContent | ToolUse -export function parseAssistantMessage(assistantMessage: string): AssistantMessageContent[] { +export function parseAssistantMessage( + assistantMessage: string, + toolCallParam?: ToolCallParam, +): AssistantMessageContent[] { let contentBlocks: AssistantMessageContent[] = [] let currentTextContent: TextContent | undefined = undefined let currentTextContentStartIndex = 0 @@ -17,7 +21,10 @@ export function parseAssistantMessage(assistantMessage: string): AssistantMessag for (let i = 0; i < assistantMessage.length; i++) { const char = assistantMessage[i] accumulator += char - + // During streaming, opportunistically attach temporary toolUseParam (if available) + if (currentToolUse && toolCallParam?.anthropicContent) { + currentToolUse.toolUseParam = toolCallParam.anthropicContent + } // There should not be a param without a tool use. if (currentToolUse && currentParamName) { const currentParamValue = accumulator.slice(currentParamValueStartIndex) @@ -103,6 +110,9 @@ export function parseAssistantMessage(assistantMessage: string): AssistantMessag name: toolUseOpeningTag.slice(1, -1) as ToolName, params: {}, partial: true, + toolUseId: toolCallParam && toolCallParam.toolUserId ? toolCallParam.toolUserId : undefined, + toolUseParam: + toolCallParam && toolCallParam?.anthropicContent ? toolCallParam?.anthropicContent : undefined, } currentToolUseStartIndex = accumulator.length diff --git a/src/core/assistant-message/presentAssistantMessage.ts b/src/core/assistant-message/presentAssistantMessage.ts index 689675999fd1..2cc223571d7f 100644 --- a/src/core/assistant-message/presentAssistantMessage.ts +++ b/src/core/assistant-message/presentAssistantMessage.ts @@ -5,7 +5,7 @@ import type { ToolName, ClineAsk, ToolProgressStatus } from "@roo-code/types" import { TelemetryService } from "@roo-code/telemetry" import { defaultModeSlug, getModeBySlug } from "../../shared/modes" -import type { ToolParamName, ToolResponse } from "../../shared/tools" +import type { ToolParamName, ToolResponse, ToolUse } from "../../shared/tools" import { fetchInstructionsTool } from "../tools/fetchInstructionsTool" import { listFilesTool } from "../tools/listFilesTool" @@ -37,6 +37,7 @@ import { Task } from "../task/Task" import { codebaseSearchTool } from "../tools/codebaseSearchTool" import { experiments, EXPERIMENT_IDS } from "../../shared/experiments" import { applyDiffToolLegacy } from "../tools/applyDiffTool" +import Anthropic from "@anthropic-ai/sdk" /** * Processes and presents assistant message content to the user interface. @@ -65,6 +66,7 @@ export async function presentAssistantMessage(cline: Task) { return } + const toolCallEnabled = cline.apiConfiguration?.toolCallEnabled cline.presentAssistantMessageLocked = true cline.presentAssistantMessageHasPendingUpdates = false @@ -249,22 +251,72 @@ export async function presentAssistantMessage(cline: Task) { } if (cline.didAlreadyUseTool) { - // Ignore any content after a tool has already been used. - cline.userMessageContent.push({ + const rejectMessage: Anthropic.TextBlockParam = { type: "text", text: `Tool [${block.name}] was not executed because a tool has already been used in this message. Only one tool may be used per message. You must assess the first tool's result before proceeding to use the next tool.`, - }) + } + if (!block.toolUseId) { + // Ignore any content after a tool has already been used. + cline.userMessageContent.push(rejectMessage) + } else { + cline.userMessageContent.push({ + type: "tool_result", + tool_use_id: block.toolUseId, + content: [rejectMessage], + }) + } break } const pushToolResult = (content: ToolResponse) => { - cline.userMessageContent.push({ type: "text", text: `${toolDescription()} Result:` }) - + const newUserMessages: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[] = [ + { type: "text", text: `${toolDescription()} Result:` }, + ] if (typeof content === "string") { - cline.userMessageContent.push({ type: "text", text: content || "(tool did not return anything)" }) + newUserMessages.push({ type: "text", text: content || "(tool did not return anything)" }) } else { - cline.userMessageContent.push(...content) + newUserMessages.push(...content) + } + + let hasSet = false + if (toolCallEnabled) { + const lastToolUseMessage = cline.assistantMessageContent.find( + (msg) => msg.type === "tool_use" && block.toolUseId && msg.toolUseId === block.toolUseId, + ) as ToolUse + if (lastToolUseMessage) { + const toolUseId = block.toolUseId! + let toolResultMessage = cline.userMessageContent.find( + (msg) => msg.type === "tool_result" && msg.tool_use_id === toolUseId, + ) + if (toolResultMessage !== undefined && toolResultMessage.type === "tool_result") { + const content = toolResultMessage.content + const updateMessages: Array = [] + if (typeof content === "string") { + updateMessages.push({ type: "text", text: content }) + } else if (Array.isArray(content)) { + updateMessages.push(...content) + } else { + throw new Error( + "Unexpected tool result content type: " + JSON.stringify(toolResultMessage), + ) + } + updateMessages.push(...newUserMessages) + toolResultMessage.content = updateMessages + } else { + const toolMessage: Anthropic.ToolResultBlockParam = { + tool_use_id: toolUseId, + type: "tool_result", + content: newUserMessages, + } + cline.userMessageContent.push(toolMessage) + } + hasSet = true + } + } + + if (!hasSet) { + cline.userMessageContent.push(...newUserMessages) } // Once a tool result has been collected, ignore all other tool @@ -470,7 +522,7 @@ export async function presentAssistantMessage(cline: Task) { case "read_file": // Check if this model should use the simplified single-file read tool const modelId = cline.api.getModel().id - if (shouldUseSingleFileRead(modelId)) { + if (shouldUseSingleFileRead(modelId) && toolCallEnabled !== true) { await simpleReadFileTool( cline, block, diff --git a/src/core/config/ProviderSettingsManager.ts b/src/core/config/ProviderSettingsManager.ts index 21a7a060c1a6..a187c6c43ad1 100644 --- a/src/core/config/ProviderSettingsManager.ts +++ b/src/core/config/ProviderSettingsManager.ts @@ -33,6 +33,7 @@ export const providerProfilesSchema = z.object({ openAiHeadersMigrated: z.boolean().optional(), consecutiveMistakeLimitMigrated: z.boolean().optional(), todoListEnabledMigrated: z.boolean().optional(), + toolCallEnabledMigrated: z.boolean().optional(), }) .optional(), }) @@ -57,6 +58,7 @@ export class ProviderSettingsManager { openAiHeadersMigrated: true, // Mark as migrated on fresh installs consecutiveMistakeLimitMigrated: true, // Mark as migrated on fresh installs todoListEnabledMigrated: true, // Mark as migrated on fresh installs + toolCallEnabledMigrated: true, // Mark as migrated on fresh installs }, } @@ -157,6 +159,11 @@ export class ProviderSettingsManager { providerProfiles.migrations.todoListEnabledMigrated = true isDirty = true } + if (!providerProfiles.migrations.toolCallEnabledMigrated) { + await this.migrateToolCallEnabled(providerProfiles) + providerProfiles.migrations.toolCallEnabledMigrated = true + isDirty = true + } if (isDirty) { await this.store(providerProfiles) @@ -274,6 +281,17 @@ export class ProviderSettingsManager { console.error(`[MigrateTodoListEnabled] Failed to migrate todo list enabled setting:`, error) } } + private async migrateToolCallEnabled(providerProfiles: ProviderProfiles) { + try { + for (const [_name, apiConfig] of Object.entries(providerProfiles.apiConfigs)) { + if (apiConfig.toolCallEnabled === undefined) { + apiConfig.toolCallEnabled = false + } + } + } catch (error) { + console.error(`[migrateToolCallEnabled] Failed to migrate tool call enabled setting:`, error) + } + } /** * Clean model ID by removing prefix before "/" diff --git a/src/core/config/__tests__/ProviderSettingsManager.spec.ts b/src/core/config/__tests__/ProviderSettingsManager.spec.ts index e95d2b100ba2..58d46ae226c4 100644 --- a/src/core/config/__tests__/ProviderSettingsManager.spec.ts +++ b/src/core/config/__tests__/ProviderSettingsManager.spec.ts @@ -68,6 +68,7 @@ describe("ProviderSettingsManager", () => { openAiHeadersMigrated: true, consecutiveMistakeLimitMigrated: true, todoListEnabledMigrated: true, + toolCallEnabledMigrated: true, }, }), ) diff --git a/src/core/prompts/responses.ts b/src/core/prompts/responses.ts index fd51b18feda4..7bbd6ee84ebf 100644 --- a/src/core/prompts/responses.ts +++ b/src/core/prompts/responses.ts @@ -18,10 +18,10 @@ export const formatResponse = { rooIgnoreError: (path: string) => `Access to ${path} is blocked by the .rooignore file settings. You must try to continue in the task without using this file, or ask the user to update the .rooignore file.`, - noToolsUsed: () => + noToolsUsed: (toolCallEnabled: boolean) => `[ERROR] You did not use a tool in your previous response! Please retry with a tool use. -${toolUseInstructionsReminder} +${toolCallEnabled ? "" : toolUseInstructionsReminder} # Next Steps @@ -33,10 +33,15 @@ Otherwise, if you have not completed the task and do not need additional informa tooManyMistakes: (feedback?: string) => `You seem to be having trouble proceeding. The user has provided the following feedback to help guide you:\n\n${feedback}\n`, - missingToolParameterError: (paramName: string) => - `Missing value for required parameter '${paramName}'. Please retry with complete response.\n\n${toolUseInstructionsReminder}`, + missingToolParameterError: (paramName: string, toolCallEnabled: boolean) => + `Missing value for required parameter '${paramName}'. Please retry with complete response.\n\n${toolCallEnabled ? "" : toolUseInstructionsReminder}`, - lineCountTruncationError: (actualLineCount: number, isNewFile: boolean, diffStrategyEnabled: boolean = false) => { + lineCountTruncationError: ( + actualLineCount: number, + isNewFile: boolean, + diffStrategyEnabled: boolean = false, + toolCallEnabled: boolean = false, + ) => { const truncationMessage = `Note: Your response may have been truncated because it exceeded your output limit. You wrote ${actualLineCount} lines of content, but the line_count parameter was either missing or not included in your response.` const newFileGuidance = @@ -66,7 +71,7 @@ Otherwise, if you have not completed the task and do not need additional informa `RECOMMENDED APPROACH:\n` + `${existingFileApproaches.join("\n")}\n` - return `${isNewFile ? newFileGuidance : existingFileGuidance}\n${toolUseInstructionsReminder}` + return `${isNewFile ? newFileGuidance : existingFileGuidance}\n${toolCallEnabled ? "" : toolUseInstructionsReminder}` }, invalidMcpToolArgumentError: (serverName: string, toolName: string) => diff --git a/src/core/prompts/sections/__tests__/tool-use-guidelines.spec.ts b/src/core/prompts/sections/__tests__/tool-use-guidelines.spec.ts index 98e4da3a731e..e2ddbcfc5e52 100644 --- a/src/core/prompts/sections/__tests__/tool-use-guidelines.spec.ts +++ b/src/core/prompts/sections/__tests__/tool-use-guidelines.spec.ts @@ -1,5 +1,6 @@ import { getToolUseGuidelinesSection } from "../tool-use-guidelines" import type { CodeIndexManager } from "../../../../services/code-index/manager" +import type { SystemPromptSettings } from "../../types" describe("getToolUseGuidelinesSection", () => { // Mock CodeIndexManager with codebase search available @@ -38,7 +39,25 @@ describe("getToolUseGuidelinesSection", () => { expect(guidelines).toContain("2. **CRITICAL:") expect(guidelines).toContain("3. Choose the most appropriate tool") expect(guidelines).toContain("4. If multiple actions are needed") - expect(guidelines).toContain("5. Formulate your tool use") + const guidelinesWithToolCall = getToolUseGuidelinesSection(mockCodeIndexManagerEnabled, { + toolCallEnabled: true, + maxConcurrentFileReads: 5, + todoListEnabled: true, + useAgentRules: true, + newTaskRequireTodos: true, + }) + + expect(guidelinesWithToolCall).not.toContain("5. Formulate your tool use") + + const guidelinesWithoutToolCall = getToolUseGuidelinesSection(mockCodeIndexManagerEnabled, { + toolCallEnabled: false, + maxConcurrentFileReads: 5, + todoListEnabled: true, + useAgentRules: true, + newTaskRequireTodos: true, + }) + + expect(guidelinesWithoutToolCall).toContain("5. Formulate your tool use") expect(guidelines).toContain("6. After each tool use") expect(guidelines).toContain("7. ALWAYS wait for user confirmation") }) @@ -62,7 +81,25 @@ describe("getToolUseGuidelinesSection", () => { expect(guidelines).toContain("1. In tags") expect(guidelines).toContain("2. Choose the most appropriate tool") expect(guidelines).toContain("3. If multiple actions are needed") - expect(guidelines).toContain("4. Formulate your tool use") + const guidelinesWithToolCall = getToolUseGuidelinesSection(mockCodeIndexManagerDisabled, { + toolCallEnabled: true, + maxConcurrentFileReads: 5, + todoListEnabled: true, + useAgentRules: true, + newTaskRequireTodos: true, + }) + + expect(guidelinesWithToolCall).not.toContain("4. Formulate your tool use") + + const guidelinesWithoutToolCall = getToolUseGuidelinesSection(mockCodeIndexManagerDisabled, { + toolCallEnabled: false, + maxConcurrentFileReads: 5, + todoListEnabled: true, + useAgentRules: true, + newTaskRequireTodos: true, + }) + + expect(guidelinesWithoutToolCall).toContain("4. Formulate your tool use") expect(guidelines).toContain("5. After each tool use") expect(guidelines).toContain("6. ALWAYS wait for user confirmation") }) diff --git a/src/core/prompts/sections/modes.ts b/src/core/prompts/sections/modes.ts index 9b863840c0c2..298fb17c64f2 100644 --- a/src/core/prompts/sections/modes.ts +++ b/src/core/prompts/sections/modes.ts @@ -5,8 +5,12 @@ import { promises as fs } from "fs" import type { ModeConfig } from "@roo-code/types" import { getAllModesWithPrompts } from "../../../shared/modes" +import { SystemPromptSettings } from "../types" -export async function getModesSection(context: vscode.ExtensionContext): Promise { +export async function getModesSection( + context: vscode.ExtensionContext, + settings?: SystemPromptSettings, +): Promise { const settingsDir = path.join(context.globalStorageUri.fsPath, "settings") await fs.mkdir(settingsDir, { recursive: true }) @@ -33,11 +37,14 @@ ${allModes .join("\n")}` modesContent += ` -If the user asks you to create or edit a new mode for this project, you should read the instructions by using the fetch_instructions tool, like this: +If the user asks you to create or edit a new mode for this project, you should read the instructions by using the fetch_instructions tool` + if (settings?.toolCallEnabled !== true) { + modesContent += `, like this: create_mode ` + } return modesContent } diff --git a/src/core/prompts/sections/tool-use-guidelines.ts b/src/core/prompts/sections/tool-use-guidelines.ts index a526bbc7112e..93cae7f4c6b7 100644 --- a/src/core/prompts/sections/tool-use-guidelines.ts +++ b/src/core/prompts/sections/tool-use-guidelines.ts @@ -1,6 +1,10 @@ import { CodeIndexManager } from "../../../services/code-index/manager" +import { SystemPromptSettings } from "../types" -export function getToolUseGuidelinesSection(codeIndexManager?: CodeIndexManager): string { +export function getToolUseGuidelinesSection( + codeIndexManager?: CodeIndexManager, + settings?: SystemPromptSettings, +): string { const isCodebaseSearchAvailable = codeIndexManager && codeIndexManager.isFeatureEnabled && @@ -34,7 +38,9 @@ export function getToolUseGuidelinesSection(codeIndexManager?: CodeIndexManager) guidelinesList.push( `${itemNumber++}. If multiple actions are needed, use one tool at a time per message to accomplish the task iteratively, with each tool use being informed by the result of the previous tool use. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result.`, ) - guidelinesList.push(`${itemNumber++}. Formulate your tool use using the XML format specified for each tool.`) + if (settings?.toolCallEnabled !== true) { + guidelinesList.push(`${itemNumber++}. Formulate your tool use using the XML format specified for each tool.`) + } guidelinesList.push(`${itemNumber++}. After each tool use, the user will respond with the result of that tool use. This result will provide you with the necessary information to continue your task or make further decisions. This response may include: - Information about whether the tool succeeded or failed, along with any reasons for failure. - Linter errors that may have arisen due to the changes you made, which you'll need to address. diff --git a/src/core/prompts/sections/tool-use.ts b/src/core/prompts/sections/tool-use.ts index c598fabae34c..9fa11afedff9 100644 --- a/src/core/prompts/sections/tool-use.ts +++ b/src/core/prompts/sections/tool-use.ts @@ -1,11 +1,16 @@ -export function getSharedToolUseSection(): string { - return `==== +import { SystemPromptSettings } from "../types" + +export function getSharedToolUseSection(settings?: SystemPromptSettings): string { + let out = `==== TOOL USE You have access to a set of tools that are executed upon the user's approval. You can use one tool per message, and will receive the result of that tool use in the user's response. You use tools step-by-step to accomplish a given task, with each tool use informed by the result of the previous tool use. -# Tool Use Formatting +` + + if (settings?.toolCallEnabled !== true) { + out += `# Tool Use Formatting Tool uses are formatted using XML-style tags. The tool name itself becomes the XML tag name. Each parameter is enclosed within its own set of tags. Here's the structure: @@ -16,4 +21,6 @@ Tool uses are formatted using XML-style tags. The tool name itself becomes the X Always use the actual tool name as the XML tag name for proper parsing and execution.` + } + return out } diff --git a/src/core/prompts/system.ts b/src/core/prompts/system.ts index 3cc327c8151c..3113e6d00731 100644 --- a/src/core/prompts/system.ts +++ b/src/core/prompts/system.ts @@ -80,7 +80,7 @@ async function generatePrompt( const shouldIncludeMcp = hasMcpGroup && hasMcpServers const [modesSection, mcpServersSection] = await Promise.all([ - getModesSection(context), + getModesSection(context, settings), shouldIncludeMcp ? getMcpServersSection(mcpHub, effectiveDiffStrategy, enableMcpServerCreation) : Promise.resolve(""), @@ -92,7 +92,7 @@ async function generatePrompt( ${markdownFormattingSection()} -${getSharedToolUseSection()} +${getSharedToolUseSection(settings)} ${getToolDescriptionsForMode( mode, @@ -110,7 +110,7 @@ ${getToolDescriptionsForMode( modelId, )} -${getToolUseGuidelinesSection(codeIndexManager)} +${getToolUseGuidelinesSection(codeIndexManager, settings)} ${mcpServersSection} diff --git a/src/core/prompts/tools/index.ts b/src/core/prompts/tools/index.ts index c212b18a3de4..0f03c3a14bd8 100644 --- a/src/core/prompts/tools/index.ts +++ b/src/core/prompts/tools/index.ts @@ -1,8 +1,8 @@ import type { ToolName, ModeConfig } from "@roo-code/types" -import { TOOL_GROUPS, ALWAYS_AVAILABLE_TOOLS, DiffStrategy } from "../../../shared/tools" +import { DiffStrategy } from "../../../shared/tools" import { McpHub } from "../../../services/mcp/McpHub" -import { Mode, getModeConfig, isToolAllowedForMode, getGroupName } from "../../../shared/modes" +import { Mode } from "../../../shared/modes" import { ToolArgs } from "./types" import { getExecuteCommandDescription } from "./execute-command" @@ -28,6 +28,8 @@ import { getUpdateTodoListDescription } from "./update-todo-list" import { getRunSlashCommandDescription } from "./run-slash-command" import { getGenerateImageDescription } from "./generate-image" import { CodeIndexManager } from "../../../services/code-index/manager" +import { getToolRegistry } from "./schemas/tool-registry" +import { getToolAvailability, type ToolAvailabilityArgs } from "./tool-availability" // Map of tool names to their description functions const toolDescriptionMap: Record string | undefined> = { @@ -77,13 +79,15 @@ export function getToolDescriptionsForMode( enableMcpServerCreation?: boolean, modelId?: string, ): string { - const config = getModeConfig(mode, customModes) - const args: ToolArgs = { + const toolAvailabilityArgs: ToolAvailabilityArgs = { + mode, cwd, supportsComputerUse, + codeIndexManager, diffStrategy, browserViewportSize, mcpHub, + customModes, partialReadsEnabled, settings: { ...settings, @@ -93,65 +97,21 @@ export function getToolDescriptionsForMode( experiments, } - const tools = new Set() + const { xmlTools } = getToolAvailability(toolAvailabilityArgs) - // Add tools from mode's groups - config.groups.forEach((groupEntry) => { - const groupName = getGroupName(groupEntry) - const toolGroup = TOOL_GROUPS[groupName] - if (toolGroup) { - toolGroup.tools.forEach((tool) => { - if ( - isToolAllowedForMode( - tool as ToolName, - mode, - customModes ?? [], - undefined, - undefined, - experiments ?? {}, - ) - ) { - tools.add(tool) - } - }) - } - }) - - // Add always available tools - ALWAYS_AVAILABLE_TOOLS.forEach((tool) => tools.add(tool)) - - // Conditionally exclude codebase_search if feature is disabled or not configured - if ( - !codeIndexManager || - !(codeIndexManager.isFeatureEnabled && codeIndexManager.isFeatureConfigured && codeIndexManager.isInitialized) - ) { - tools.delete("codebase_search") - } - - // Conditionally exclude update_todo_list if disabled in settings - if (settings?.todoListEnabled === false) { - tools.delete("update_todo_list") - } - - // Conditionally exclude generate_image if experiment is not enabled - if (!experiments?.imageGeneration) { - tools.delete("generate_image") - } - - // Conditionally exclude run_slash_command if experiment is not enabled - if (!experiments?.runSlashCommand) { - tools.delete("run_slash_command") + if (xmlTools.length === 0) { + return "" } - // Map tool descriptions for allowed tools - const descriptions = Array.from(tools).map((toolName) => { + // Map tool descriptions for XML tools only + const descriptions = xmlTools.map((toolName) => { const descriptionFn = toolDescriptionMap[toolName] if (!descriptionFn) { return undefined } return descriptionFn({ - ...args, + ...toolAvailabilityArgs, toolOptions: undefined, // No tool options in group-based approach }) }) diff --git a/src/core/prompts/tools/schemas/access-mcp-resource-schema.ts b/src/core/prompts/tools/schemas/access-mcp-resource-schema.ts new file mode 100644 index 000000000000..2720f06bcfa7 --- /dev/null +++ b/src/core/prompts/tools/schemas/access-mcp-resource-schema.ts @@ -0,0 +1,44 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateAccessMcpResourceSchema(args: ToolArgs): BaseToolSchema { + const schema: BaseToolSchema = { + name: "access_mcp_resource", + description: + "Request to access a resource provided by a connected MCP server. Resources represent data sources that can be used as context, such as files, API responses, or system information.", + parameters: [ + { + name: "server_name", + type: "string", + description: "The name of the MCP server providing the resource", + required: true, + }, + { + name: "uri", + type: "string", + description: "The URI identifying the specific resource to access", + required: true, + }, + ], + systemPrompt: `## access_mcp_resource +Description: Request to access a resource provided by a connected MCP server. Resources represent data sources that can be used as context, such as files, API responses, or system information. +Parameters: +- server_name: (required) The name of the MCP server providing the resource +- uri: (required) The URI identifying the specific resource to access + +Usage: + +server name here +resource URI here + + +Example: Requesting to access an MCP resource + + +weather-server +weather://san-francisco/current +`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/apply-diff-schema.ts b/src/core/prompts/tools/schemas/apply-diff-schema.ts new file mode 100644 index 000000000000..96c35a18b618 --- /dev/null +++ b/src/core/prompts/tools/schemas/apply-diff-schema.ts @@ -0,0 +1,366 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateApplyDiffSchema(args: ToolArgs): BaseToolSchema { + if (args?.diffStrategy?.getName() === "MultiFileSearchReplace") { + return generateMultipleApplyDiffSchema(args) + } + const schema: BaseToolSchema = { + name: "apply_diff", + description: `Request to apply PRECISE, TARGETED modifications to an existing file by searching for specific sections of content and replacing them. This tool is for SURGICAL EDITS ONLY - specific changes to existing code. +You can perform multiple distinct search and replace operations within a single \`apply_diff\` call by providing multiple SEARCH/REPLACE blocks in the \`diff\` parameter. This is the preferred way to make several targeted changes efficiently. +If you're not confident in the exact content to search for, use the read_file tool first to get the exact content. +When applying the diffs, be extra careful to remember to change any closing brackets or other syntax that may be affected by the diff farther down in the file. +ALWAYS make as many changes in a single 'apply_diff' request as possible using multiple SEARCH/REPLACE blocks`, + parameters: [ + { + name: "path", + type: "string", + description: `The path of the file to modify (relative to the current workspace directory ${args.cwd})`, + required: true, + }, + { + name: "diff", + type: "array", + description: "The search/replace block defining the changes.", + required: true, + items: { + name: "diffItem", + type: "object", + description: "A single search-and-replace operation.", + required: true, + properties: { + d1: { + name: "start_line", + type: "number", + description: "The line number of original content where the search block starts.", + required: true, + }, + d2: { + name: "search", + type: "string", + description: + "SEARCH BLOCK. MUST exactly match existing content including whitespace and indentation.", + required: true, + }, + d3: { + name: "replace", + type: "string", + description: "REPLACE BLOCK.", + required: true, + }, + }, + }, + }, + ], + + systemPrompt: `## apply_diff +Description: Request to apply PRECISE, TARGETED modifications to an existing file by searching for specific sections of content and replacing them. This tool is for SURGICAL EDITS ONLY - specific changes to existing code. +You can perform multiple distinct search and replace operations within a single \`apply_diff\` call by providing multiple SEARCH/REPLACE blocks in the \`diff\` parameter. This is the preferred way to make several targeted changes efficiently. +The SEARCH section must exactly match existing content including whitespace and indentation. +If you're not confident in the exact content to search for, use the read_file tool first to get the exact content. +When applying the diffs, be extra careful to remember to change any closing brackets or other syntax that may be affected by the diff farther down in the file. +ALWAYS make as many changes in a single 'apply_diff' request as possible using multiple SEARCH/REPLACE blocks + +Parameters: +- path: (required) The path of the file to modify (relative to the current workspace directory ${args.cwd}) +- diff: (required) The search/replace block defining the changes. + +Diff format: +\`\`\` +<<<<<<< SEARCH +:start_line: (required) The line number of original content where the search block starts. +------- +[exact content to find including whitespace] +======= +[new content to replace with] +>>>>>>> REPLACE + +\`\`\` + + +Example: + +Original file: +\`\`\` +1 | def calculate_total(items): +2 | total = 0 +3 | for item in items: +4 | total += item +5 | return total +\`\`\` + +Search/Replace content: +\`\`\` +<<<<<<< SEARCH +:start_line:1 +------- +def calculate_total(items): + total = 0 + for item in items: + total += item + return total +======= +def calculate_total(items): + """Calculate total with 10% markup""" + return sum(item * 1.1 for item in items) +>>>>>>> REPLACE + +\`\`\` + +Search/Replace content with multiple edits: +\`\`\` +<<<<<<< SEARCH +:start_line:1 +------- +def calculate_total(items): + sum = 0 +======= +def calculate_sum(items): + sum = 0 +>>>>>>> REPLACE + +<<<<<<< SEARCH +:start_line:4 +------- + total += item + return total +======= + sum += item + return sum +>>>>>>> REPLACE +\`\`\` + + +Usage: + +File path here + +Your search/replace content here +You can use multi search/replace block in one diff block, but make sure to include the line numbers for each block. +Only use a single line of '=======' between search and replacement content, because multiple '=======' will corrupt the file. + +`, + } + + return schema +} + +function generateMultipleApplyDiffSchema(args: ToolArgs): BaseToolSchema { + const schema: BaseToolSchema = { + name: "apply_diff", + description: `Request to apply PRECISE, TARGETED modifications to one or more files by searching for specific sections of content and replacing them. This tool is for SURGICAL EDITS ONLY - specific changes to existing code. This tool supports both single-file and multi-file operations, allowing you to make changes across multiple files in a single request. +**IMPORTANT: You MUST use multiple files in a single operation whenever possible to maximize efficiency and minimize back-and-forth.** +You can perform multiple distinct search and replace operations within a single \`apply_diff\` call by providing multiple SEARCH/REPLACE blocks in the \`diff\` parameter. This is the preferred way to make several targeted changes efficiently. +The SEARCH section must exactly match existing content including whitespace and indentation. +If you're not confident in the exact content to search for, use the read_file tool first to get the exact content. +When applying the diffs, be extra careful to remember to change any closing brackets or other syntax that may be affected by the diff farther down in the file. +ALWAYS make as many changes in a single 'apply_diff' request as possible using multiple SEARCH/REPLACE blocks`, + parameters: [ + { + name: "args", + type: "object", + description: "Container for the file modification arguments.", + required: true, + properties: { + file: { + name: "file", + type: "array", + description: `One or more file change objects.`, + required: true, + items: { + name: "fileItem", + type: "object", + description: "A file modification object containing the path and diff operations.", + required: true, + properties: { + path: { + name: "path", + type: "string", + description: `The path of the file to modify (relative to the current workspace directory ${args.cwd})`, + required: true, + }, + diff: { + name: "diff", + type: "array", + description: "One or more diff elements containing.", + required: true, + items: { + name: "diffItem", + type: "object", + description: + "A single search-and-replace operation. This object contains the search criteria and the replacement content.", + required: true, + properties: { + search: { + name: "search", + type: "string", + description: + "SEARCH BLOCK. MUST exactly match existing content including whitespace and indentation.", + required: true, + }, + replace: { + name: "replace", + type: "string", + description: "REPLACE BLOCK.", + required: true, + }, + start_line: { + name: "start_line", + type: "number", + description: + "The line number of original content where the search block starts", + required: false, + }, + }, + }, + }, + }, + }, + }, + }, + }, + ], + systemPrompt: `## apply_diff + +Description: Request to apply PRECISE, TARGETED modifications to one or more files by searching for specific sections of content and replacing them. This tool is for SURGICAL EDITS ONLY - specific changes to existing code. This tool supports both single-file and multi-file operations, allowing you to make changes across multiple files in a single request. + +**IMPORTANT: You MUST use multiple files in a single operation whenever possible to maximize efficiency and minimize back-and-forth.** + +You can perform multiple distinct search and replace operations within a single \`apply_diff\` call by providing multiple SEARCH/REPLACE blocks in the \`diff\` parameter. This is the preferred way to make several targeted changes efficiently. + +The SEARCH section must exactly match existing content including whitespace and indentation. +If you're not confident in the exact content to search for, use the read_file tool first to get the exact content. +When applying the diffs, be extra careful to remember to change any closing brackets or other syntax that may be affected by the diff farther down in the file. +ALWAYS make as many changes in a single 'apply_diff' request as possible using multiple SEARCH/REPLACE blocks + +Parameters: +- args: Contains one or more file elements, where each file contains: + - path: (required) The path of the file to modify (relative to the current workspace directory ${args.cwd}) + - diff: (required) One or more diff elements containing: + - content: (required) The search/replace block defining the changes. + - start_line: (required) The line number of original content where the search block starts. + +Diff format: +\`\`\` +<<<<<<< SEARCH +:start_line: (required) The line number of original content where the search block starts. +------- +[exact content to find including whitespace] +======= +[new content to replace with] +>>>>>>> REPLACE +\`\`\` + +Example: + +Original file: +\`\`\` +1 | def calculate_total(items): +2 | total = 0 +3 | for item in items: +4 | total += item +5 | return total +\`\`\` + +Search/Replace content: + + + + eg.file.py + + >>>>>> REPLACE +]]> + + + + + +Search/Replace content with multi edits across multiple files: + + + + eg.file.py + + >>>>>> REPLACE +]]> + + + >>>>>> REPLACE +]]> + + + + eg.file2.py + + >>>>>> REPLACE +]]> + + + + + + +Usage: + + + + File path here + + +Your search/replace content here +You can use multi search/replace block in one diff block, but make sure to include the line numbers for each block. +Only use a single line of '=======' between search and replacement content, because multiple '=======' will corrupt the file. + + 1 + + + + Another file path + + +Another search/replace content here +You can apply changes to multiple files in a single request. +Each file requires its own path, start_line, and diff elements. + + 5 + + + +`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/ask-followup-question-schema.ts b/src/core/prompts/tools/schemas/ask-followup-question-schema.ts new file mode 100644 index 000000000000..50215fdde844 --- /dev/null +++ b/src/core/prompts/tools/schemas/ask-followup-question-schema.ts @@ -0,0 +1,49 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateAskFollowupQuestionSchema(args: ToolArgs): BaseToolSchema { + const schema: BaseToolSchema = { + name: "ask_followup_question", + description: `Ask the user a question to gather additional information needed to complete the task. Use when you need clarification or more details to proceed effectively.`, + parameters: [ + { + name: "question", + type: "string", + description: "A clear, specific question addressing the information needed", + required: true, + }, + { + name: "follow_up", + type: "string", + description: `A list of 2-4 suggested answers, each in its own tag. Suggestions must be complete, actionable answers without placeholders. Optionally include mode attribute to switch modes (code/architect/etc.), such as 'suggestion text'`, + required: true, + }, + ], + systemPrompt: `## ask_followup_question +Description: Ask the user a question to gather additional information needed to complete the task. Use when you need clarification or more details to proceed effectively. + +Parameters: +- question: (required) A clear, specific question addressing the information needed +- follow_up: (required) A list of 2-4 suggested answers, each in its own tag. Suggestions must be complete, actionable answers without placeholders. Optionally include mode attribute to switch modes (code/architect/etc.) + +Usage: + +Your question here + +First suggestion +Action with mode switch + + + +Example: + +What is the path to the frontend-config.json file? + +./src/frontend-config.json +./config/frontend-config.json +./frontend-config.json + +`, + } + return schema +} diff --git a/src/core/prompts/tools/schemas/attempt-completion-schema.ts b/src/core/prompts/tools/schemas/attempt-completion-schema.ts new file mode 100644 index 000000000000..9becad877989 --- /dev/null +++ b/src/core/prompts/tools/schemas/attempt-completion-schema.ts @@ -0,0 +1,39 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateAttemptCompletionSchema(args: ToolArgs): BaseToolSchema { + const schema: BaseToolSchema = { + name: "attempt_completion", + description: + "After each tool use, the user will respond with the result of that tool use, i.e. if it succeeded or failed, along with any reasons for failure. Once you've received the results of tool uses and can confirm that the task is complete, use this tool to present the result of your work to the user. The user may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again.", + parameters: [ + { + name: "result", + type: "string", + description: + "The result of the task. Formulate this result in a way that is final and does not require further input from the user. Don't end your result with questions or offers for further assistance.", + required: true, + }, + ], + systemPrompt: `## attempt_completion +Description: After each tool use, the user will respond with the result of that tool use, i.e. if it succeeded or failed, along with any reasons for failure. Once you've received the results of tool uses and can confirm that the task is complete, use this tool to present the result of your work to the user. The user may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again. +IMPORTANT NOTE: This tool CANNOT be used until you've confirmed from the user that any previous tool uses were successful. Failure to do so will result in code corruption and system failure. Before using this tool, you must ask yourself in tags if you've confirmed from the user that any previous tool uses were successful. If not, then DO NOT use this tool. +Parameters: +- result: (required) The result of the task. Formulate this result in a way that is final and does not require further input from the user. Don't end your result with questions or offers for further assistance. +Usage: + + +Your final result description here + + + +Example: Requesting to attempt completion with a result + + +I've updated the CSS + +`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/base-tool-schema.ts b/src/core/prompts/tools/schemas/base-tool-schema.ts new file mode 100644 index 000000000000..291f9888c3f7 --- /dev/null +++ b/src/core/prompts/tools/schemas/base-tool-schema.ts @@ -0,0 +1,109 @@ +/** + * Base tool schema that can generate both XML descriptions and native function call schemas + */ + +import Anthropic from "@anthropic-ai/sdk" +import { ToolArgs } from "../types" + +export interface ToolParameter { + name?: string + type: "string" | "number" | "boolean" | "object" | "array" + description?: string + required?: boolean + enum?: string[] + items?: ToolParameter // For array types + properties?: Record // For object types +} + +export interface BaseToolSchema { + name: string + description: string + parameters: ToolParameter[] + customDescription?: (args: ToolArgs) => BaseToolSchema | undefined + systemPrompt?: string +} + +/** + * Recursively converts ToolParameter to JSON Schema property + */ +function toolParamToSchema(param: ToolParameter): any { + const schema: any = { + type: param.type, + } + if (param.description) { + schema.description = param.description + } + if (param.enum) { + schema.enum = param.enum + } + if (param.type === "array" && param.items) { + schema.items = toolParamToSchema(param.items) + } + if (param.type === "object" && param.properties) { + schema.properties = {} + schema.required = [] + for (const [k, v] of Object.entries(param.properties)) { + schema.properties[k] = toolParamToSchema(v) + if (v.required) { + schema.required.push(k) + } + } + if (schema.required.length === 0) delete schema.required + } + return schema +} + +/** + * Converts a BaseToolSchema to OpenAI function call schema + */ +export function generateFunctionCallSchema(schema: BaseToolSchema) { + const { name, description, parameters } = schema + const properties: Record = {} + const required: string[] = [] + for (const param of parameters) { + if (param.name) { + properties[param.name] = toolParamToSchema(param) + if (param.required) { + required.push(param.name) + } + } + } + return { + type: "function", + function: { + name, + description, + parameters: { + type: "object", + properties, + required, + }, + }, + } +} + +/** + * Converts a BaseToolSchema to Anthropic tool schema + */ +export function generateAnthropicToolSchema(schema: BaseToolSchema): Anthropic.ToolUnion { + const { name, description, parameters } = schema + const inputSchema: any = { + type: "object", + properties: {}, + required: [], + } + for (const param of parameters) { + if (param.name) { + inputSchema.properties[param.name] = toolParamToSchema(param) + if (param.required) { + inputSchema.required.push(param.name) + } + } + } + if (inputSchema.required.length === 0) delete inputSchema.required + return { + name, + description, + input_schema: inputSchema, + } +} diff --git a/src/core/prompts/tools/schemas/browser-action-schema.ts b/src/core/prompts/tools/schemas/browser-action-schema.ts new file mode 100644 index 000000000000..f2b14ac70d54 --- /dev/null +++ b/src/core/prompts/tools/schemas/browser-action-schema.ts @@ -0,0 +1,101 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateBrowserActionSchema(args: ToolArgs): BaseToolSchema | undefined { + if (!args.supportsComputerUse) { + return undefined + } + + const schema: BaseToolSchema = { + name: "browser_action", + description: `Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action.`, + parameters: [ + { + name: "action", + type: "string", + description: + "The action to perform. The available actions are: launch, hover, click, type, resize, scroll_down, scroll_up, close", + required: true, + enum: ["launch", "hover", "click", "type", "resize", "scroll_down", "scroll_up", "close"], + }, + { + name: "url", + type: "string", + description: "Use this for providing the URL for the `launch` action.", + required: false, + }, + { + name: "coordinate", + type: "string", + description: `The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **${args.browserViewportSize}** resolution.`, + required: false, + }, + { + name: "size", + type: "string", + description: "The width and height for the `resize` action.", + required: false, + }, + { + name: "text", + type: "string", + description: "Use this for providing the text for the `type` action.", + required: false, + }, + ], + systemPrompt: `## browser_action +Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. +- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. +- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. +- The browser window has a resolution of **${args.browserViewportSize}** pixels. When performing any click actions, ensure the coordinates are within this resolution range. +- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. +Parameters: +- action: (required) The action to perform. The available actions are: + * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. + - Use with the \`url\` parameter to provide the URL. + - Ensure the URL is valid and includes the appropriate protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.) + * hover: Move the cursor to a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always move to the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * click: Click at a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. + - Use with the \`text\` parameter to provide the string to type. + * resize: Resize the viewport to a specific w,h size. + - Use with the \`size\` parameter to specify the new size. + * scroll_down: Scroll down the page by one page height. + * scroll_up: Scroll up the page by one page height. + * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**. + - Example: \`close\` +- url: (optional) Use this for providing the URL for the \`launch\` action. + * Example: https://example.com +- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **${args.browserViewportSize}** resolution. + * Example: 450,300 +- size: (optional) The width and height for the \`resize\` action. + * Example: 1280,720 +- text: (optional) Use this for providing the text for the \`type\` action. + * Example: Hello, world! +Usage: + +Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +URL to launch the browser at (optional) +x,y coordinates (optional) +Text to type (optional) + + +Example: Requesting to launch a browser at https://example.com + +launch +https://example.com + + +Example: Requesting to click on the element at coordinates 450,300 + +click +450,300 +`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/codebase-search-schema.ts b/src/core/prompts/tools/schemas/codebase-search-schema.ts new file mode 100644 index 000000000000..31b14c14a3f2 --- /dev/null +++ b/src/core/prompts/tools/schemas/codebase-search-schema.ts @@ -0,0 +1,46 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateCodebaseSearchSchema(args: ToolArgs): BaseToolSchema { + const schema: BaseToolSchema = { + name: "codebase_search", + description: + "Find files most relevant to the search query using semantic search. Searches based on meaning rather than exact text matches. By default searches entire workspace. Reuse the user's exact wording unless there's a clear reason not to - their phrasing often helps semantic search. Queries MUST be in English (translate if needed).", + parameters: [ + { + name: "query", + type: "string", + description: + "The search query. Reuse the user's exact wording/question format unless there's a clear reason not to.", + required: true, + }, + { + name: "path", + type: "string", + description: `Limit search to specific subdirectory (relative to the current workspace directory ${args.cwd}). Leave empty for entire workspace.`, + required: false, + }, + ], + systemPrompt: `## codebase_search +Description: Find files most relevant to the search query using semantic search. Searches based on meaning rather than exact text matches. By default searches entire workspace. Reuse the user's exact wording unless there's a clear reason not to - their phrasing often helps semantic search. Queries MUST be in English (translate if needed). + +Parameters: +- query: (required) The search query. Reuse the user's exact wording/question format unless there's a clear reason not to. +- path: (optional) Limit search to specific subdirectory (relative to the current workspace directory ${args.cwd}). Leave empty for entire workspace. + +Usage: + +Your natural language query here +Optional subdirectory path + + +Example: + +User login and password hashing +src/auth + +`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/execute-command-schema.ts b/src/core/prompts/tools/schemas/execute-command-schema.ts new file mode 100644 index 000000000000..2e237bb55a56 --- /dev/null +++ b/src/core/prompts/tools/schemas/execute-command-schema.ts @@ -0,0 +1,27 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateExecuteCommandSchema(args: ToolArgs): BaseToolSchema { + const schema: BaseToolSchema = { + name: "execute_command", + description: + "Request to execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. For command chaining, use the appropriate chaining syntax for the user's shell. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Prefer relative commands and paths that avoid location sensitivity for terminal consistency, e.g: `touch ./testdata/example.file`, `dir ./examples/model1/data/yaml`, or `go test ./cmd/front --config ./cmd/front/config.yml`. If directed by the user, you may open a terminal in a different directory by using the `cwd` parameter.", + parameters: [ + { + name: "command", + type: "string", + description: + "The CLI command to execute. This should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.", + required: true, + }, + { + name: "cwd", + type: "string", + description: `The working directory to execute the command in (default: ${args.cwd})`, + required: false, + }, + ], + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/fetch-instructions-schema.ts b/src/core/prompts/tools/schemas/fetch-instructions-schema.ts new file mode 100644 index 000000000000..5b4636addb8a --- /dev/null +++ b/src/core/prompts/tools/schemas/fetch-instructions-schema.ts @@ -0,0 +1,42 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateFetchInstructionsSchema(args: ToolArgs): BaseToolSchema { + const enableMcpServerCreation = args.experiments?.enableMcpServerCreation !== false + const tasks = enableMcpServerCreation ? ["create_mcp_server", "create_mode"] : ["create_mode"] + + const schema: BaseToolSchema = { + name: "fetch_instructions", + description: "Request to fetch instructions to perform a task", + parameters: [ + { + name: "task", + type: "string", + description: "The task to get instructions for.", + required: true, + enum: tasks, + }, + ], + systemPrompt: `## fetch_instructions +Description: Request to fetch instructions to perform a task +Parameters: +- task: (required) The task to get instructions for. This can take the following values: +${tasks.map((task) => ` ${task}`).join("\n")} + +${ + enableMcpServerCreation + ? `Example: Requesting instructions to create an MCP Server + + +create_mcp_server +` + : `Example: Requesting instructions to create a Mode + + +create_mode +` +}`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/generate-image-schema.ts b/src/core/prompts/tools/schemas/generate-image-schema.ts new file mode 100644 index 000000000000..e068c15be1ae --- /dev/null +++ b/src/core/prompts/tools/schemas/generate-image-schema.ts @@ -0,0 +1,41 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateGenerateImageSchema(args: ToolArgs): BaseToolSchema { + const schema: BaseToolSchema = { + name: "generate_image", + description: `Request to generate an image using AI models through OpenRouter API. This tool creates images from text prompts and saves them to the specified path.`, + parameters: [ + { + name: "prompt", + type: "string", + description: `The text prompt describing the image to generate`, + required: true, + }, + { + name: "path", + type: "string", + description: `The file path where the generated image should be saved (relative to the current workspace directory ${args.cwd}). The tool will automatically add the appropriate image extension if not provided.`, + required: true, + }, + ], + systemPrompt: `## generate_image +Description: Request to generate an image using AI models through OpenRouter API. This tool creates images from text prompts and saves them to the specified path. +Parameters: +- prompt: (required) The text prompt describing the image to generate +- path: (required) The file path where the generated image should be saved (relative to the current workspace directory ${args.cwd}). The tool will automatically add the appropriate image extension if not provided. +Usage: + +Your image description here +path/to/save/image.png + + +Example: Requesting to generate a sunset image + +A beautiful sunset over mountains with vibrant orange and purple colors +images/sunset.png +`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/insert-content-schema.ts b/src/core/prompts/tools/schemas/insert-content-schema.ts new file mode 100644 index 000000000000..fdf9d36a80a1 --- /dev/null +++ b/src/core/prompts/tools/schemas/insert-content-schema.ts @@ -0,0 +1,62 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateInsertContentSchema(args: ToolArgs): BaseToolSchema { + const schema: BaseToolSchema = { + name: "insert_content", + description: + "Use this tool specifically for adding new lines of content into a file without modifying existing content. Specify the line number to insert before, or use line 0 to append to the end. Ideal for adding imports, functions, configuration blocks, log entries, or any multi-line text block.", + parameters: [ + { + name: "path", + type: "string", + description: `File path relative to workspace directory ${args.cwd.toPosix()}`, + required: true, + }, + { + name: "line", + type: "number", + description: + "Line number where content will be inserted (1-based). Use 0 to append at end of file. Use any positive number to insert before that line", + required: true, + }, + { + name: "content", + type: "string", + description: "The content to insert at the specified line", + required: true, + }, + ], + systemPrompt: `## insert_content +Description: Use this tool specifically for adding new lines of content into a file without modifying existing content. Specify the line number to insert before, or use line 0 to append to the end. Ideal for adding imports, functions, configuration blocks, log entries, or any multi-line text block. + +Parameters: +- path: (required) File path relative to workspace directory ${args.cwd.toPosix()} +- line: (required) Line number where content will be inserted (1-based) + Use 0 to append at end of file + Use any positive number to insert before that line +- content: (required) The content to insert at the specified line + +Example for inserting imports at start of file: + +src/utils.ts +1 + +// Add imports at start of file +import { sum } from './math'; + + + +Example for appending to the end of file: + +src/utils.ts +0 + +// This is the end of the file + + +`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/list-code-definition-names-schema.ts b/src/core/prompts/tools/schemas/list-code-definition-names-schema.ts new file mode 100644 index 000000000000..1decdf1b5cf2 --- /dev/null +++ b/src/core/prompts/tools/schemas/list-code-definition-names-schema.ts @@ -0,0 +1,40 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateListCodeDefinitionNamesSchema(args: ToolArgs): BaseToolSchema { + const schema: BaseToolSchema = { + name: "list_code_definition_names", + description: + "Request to list definition names (classes, functions, methods, etc.) from source code. This tool can analyze either a single file or all files at the top level of a specified directory. It provides insights into the codebase structure and important constructs, encapsulating high-level concepts and relationships that are crucial for understanding the overall architecture.", + parameters: [ + { + name: "path", + type: "string", + description: `File or directory path to analyze (relative to workspace directory ${args.cwd})`, + required: true, + }, + ], + systemPrompt: `## list_code_definition_names +Description: Request to list definition names (classes, functions, methods, etc.) from source code. This tool can analyze either a single file or all files at the top level of a specified directory. It provides insights into the codebase structure and important constructs, encapsulating high-level concepts and relationships that are crucial for understanding the overall architecture. +Parameters: +- path: (required) The path of the file or directory (relative to the current working directory ${args.cwd}) to analyze. When given a directory, it lists definitions from all top-level source files. +Usage: + +Directory path here + + +Examples: + +1. List definitions from a specific file: + +src/main.ts + + +2. List definitions from all files in a directory: + +src/ +`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/list-files-schema.ts b/src/core/prompts/tools/schemas/list-files-schema.ts new file mode 100644 index 000000000000..f6a0d86722cf --- /dev/null +++ b/src/core/prompts/tools/schemas/list-files-schema.ts @@ -0,0 +1,42 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateListFilesSchema(args: ToolArgs): BaseToolSchema { + const schema: BaseToolSchema = { + name: "list_files", + description: `Request to list files and directories within the specified directory. If recursive is true, it will list all files and directories recursively. If recursive is false or not provided, it will only list the top-level contents. Do not use this tool to confirm the existence of files you may have created, as the user will let you know if the files were created successfully or not.`, + parameters: [ + { + name: "path", + type: "string", + description: `The path of the directory to list contents for (relative to the current workspace directory ${args.cwd})`, + required: true, + }, + { + name: "recursive", + type: "boolean", + description: + "Whether to list files recursively. Use true for recursive listing, false or omit for top-level only.", + required: false, + }, + ], + systemPrompt: `## list_files +Description: Request to list files and directories within the specified directory. If recursive is true, it will list all files and directories recursively. If recursive is false or not provided, it will only list the top-level contents. Do not use this tool to confirm the existence of files you may have created, as the user will let you know if the files were created successfully or not. +Parameters: +- path: (required) The path of the directory to list contents for (relative to the current workspace directory ${args.cwd}) +- recursive: (optional) Whether to list files recursively. Use true for recursive listing, false or omit for top-level only. +Usage: + +Directory path here +true or false (optional) + + +Example: Requesting to list all files in the current directory + +. +false +`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/new-task-schema.ts b/src/core/prompts/tools/schemas/new-task-schema.ts new file mode 100644 index 000000000000..d2b296745259 --- /dev/null +++ b/src/core/prompts/tools/schemas/new-task-schema.ts @@ -0,0 +1,94 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +/** + * Prompt when todos are NOT required (default) + */ +const PROMPT_WITHOUT_TODOS = `## new_task +Description: This will let you create a new task instance in the chosen mode using your provided message. + +Parameters: +- mode: (required) The slug of the mode to start the new task in (e.g., "code", "debug", "architect"). +- message: (required) The initial user message or instructions for this new task. + +Usage: + +your-mode-slug-here +Your initial instructions here + + +Example: + +code +Implement a new feature for the application + +` + +/** + * Prompt when todos ARE required + */ +const PROMPT_WITH_TODOS = `## new_task +Description: This will let you create a new task instance in the chosen mode using your provided message and initial todo list. + +Parameters: +- mode: (required) The slug of the mode to start the new task in (e.g., "code", "debug", "architect"). +- message: (required) The initial user message or instructions for this new task. +- todos: (required) The initial todo list in markdown checklist format for the new task. + +Usage: + +your-mode-slug-here +Your initial instructions here + +[ ] First task to complete +[ ] Second task to complete +[ ] Third task to complete + + + +Example: + +code +Implement user authentication + +[ ] Set up auth middleware +[ ] Create login endpoint +[ ] Add session management +[ ] Write tests + + + +` + +export function generateNewTaskSchema(args: ToolArgs): BaseToolSchema { + const todosRequired = args.settings?.newTaskRequireTodos === true + const schema: BaseToolSchema = { + name: "new_task", + description: "This will let you create a new task instance in the chosen mode using your provided message.", + parameters: [ + { + name: "mode", + type: "string", + description: 'The slug of the mode to start the new task in (e.g., "code", "debug", "architect").', + required: true, + }, + { + name: "message", + type: "string", + description: "The initial user message or instructions for this new task.", + required: true, + }, + ], + systemPrompt: todosRequired ? PROMPT_WITH_TODOS : PROMPT_WITHOUT_TODOS, + } + if (todosRequired) { + schema.parameters.push({ + name: "todos", + type: "string", + description: "The initial todo list in markdown checklist format for the new task. Use '[ ]' for pending", + required: true, + }) + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/read-file-schema.ts b/src/core/prompts/tools/schemas/read-file-schema.ts new file mode 100644 index 000000000000..bf1577756931 --- /dev/null +++ b/src/core/prompts/tools/schemas/read-file-schema.ts @@ -0,0 +1,155 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateReadFileSchema(args: ToolArgs): BaseToolSchema { + const maxConcurrentReads = args.settings?.maxConcurrentFileReads ?? 5 + const isMultipleReadsEnabled = maxConcurrentReads > 1 + const partialReadsEnabled = args.partialReadsEnabled || false + + const schema: BaseToolSchema = { + name: "read_file", + description: `Request to read the contents of ${isMultipleReadsEnabled ? "one or more files" : "a file"}. The tool outputs line-numbered content (e.g. "1 | const x = 1") for easy reference when creating diffs or discussing code.${args.partialReadsEnabled ? " Use line ranges to efficiently read specific portions of large files." : ""} Supports text extraction from PDF and DOCX files, but may not handle other binary files properly. + +${isMultipleReadsEnabled ? `**IMPORTANT: You can read a maximum of ${maxConcurrentReads} files in a single request.** If you need to read more files, use multiple sequential read_file requests.` : "**IMPORTANT: Multiple file reads are currently disabled. You can only read one file at a time.**"} + +${args.partialReadsEnabled ? `By specifying line ranges, you can efficiently read specific portions of large files without loading the entire file into memory.` : ""} + +IMPORTANT: You MUST use this Efficient Reading Strategy: +- ${isMultipleReadsEnabled ? `You MUST read all related files and implementations together in a single operation (up to ${maxConcurrentReads} files at once)` : "You MUST read files one at a time, as multiple file reads are currently disabled"} +- You MUST obtain all necessary context before proceeding with changes +${ + args.partialReadsEnabled + ? `- You MUST use line ranges to read specific portions of large files, rather than reading entire files when not needed +- You MUST combine adjacent line ranges (<10 lines apart) +- You MUST use multiple ranges for content separated by >10 lines +- You MUST include sufficient line context for planned modifications while keeping ranges minimal +` + : "" +} +${isMultipleReadsEnabled ? `- When you need to read more than ${maxConcurrentReads} files, prioritize the most critical files first, then use subsequent read_file requests for additional files` : ""}`, + parameters: [ + { + name: "args", + type: "object", + description: "Contains one or more file elements, where each file contains.", + required: true, + properties: { + file: { + name: "file", + type: "array", + description: `An array of file objects to read, with an optional line range. Reading multiple files (within the ${maxConcurrentReads}-file limit).`, + required: true, + items: { + name: "fileItem", + type: "object", + description: "A file object", + required: true, + properties: { + path: { + name: "path", + type: "string", + description: `File path (relative to workspace directory ${args.cwd}).`, + required: true, + }, + ...(partialReadsEnabled + ? { + line_range: { + name: "line_range", + type: "array", + description: `One or more line range elements in format "start-end" (1-based, inclusive).`, + required: false, + items: { + name: "text", + type: "string", + }, + }, + } + : {}), + }, + }, + }, + }, + }, + ], + systemPrompt: `## read_file +Description: Request to read the contents of ${isMultipleReadsEnabled ? "one or more files" : "a file"}. The tool outputs line-numbered content (e.g. "1 | const x = 1") for easy reference when creating diffs or discussing code.${args.partialReadsEnabled ? " Use line ranges to efficiently read specific portions of large files." : ""} Supports text extraction from PDF and DOCX files, but may not handle other binary files properly. + +${isMultipleReadsEnabled ? `**IMPORTANT: You can read a maximum of ${maxConcurrentReads} files in a single request.** If you need to read more files, use multiple sequential read_file requests.` : "**IMPORTANT: Multiple file reads are currently disabled. You can only read one file at a time.**"} + +${args.partialReadsEnabled ? `By specifying line ranges, you can efficiently read specific portions of large files without loading the entire file into memory.` : ""} +Parameters: +- args: Contains one or more file elements, where each file contains: + - path: (required) File path (relative to workspace directory ${args.cwd}) + ${args.partialReadsEnabled ? `- line_range: (optional) One or more line range elements in format "start-end" (1-based, inclusive)` : ""} + +Usage: + + + + path/to/file + ${args.partialReadsEnabled ? `start-end` : ""} + + + + +Examples: + +1. Reading a single file: + + + + src/app.ts + ${args.partialReadsEnabled ? `1-1000` : ""} + + + + +${isMultipleReadsEnabled ? `2. Reading multiple files (within the ${maxConcurrentReads}-file limit):` : ""}${ + isMultipleReadsEnabled + ? ` + + + + src/app.ts + ${ + args.partialReadsEnabled + ? `1-50 + 100-150` + : "" + } + + + src/utils.ts + ${args.partialReadsEnabled ? `10-20` : ""} + + +` + : "" + } + +${isMultipleReadsEnabled ? "3. " : "2. "}Reading an entire file: + + + + config.json + + + + +IMPORTANT: You MUST use this Efficient Reading Strategy: +- ${isMultipleReadsEnabled ? `You MUST read all related files and implementations together in a single operation (up to ${maxConcurrentReads} files at once)` : "You MUST read files one at a time, as multiple file reads are currently disabled"} +- You MUST obtain all necessary context before proceeding with changes +${ + args.partialReadsEnabled + ? `- You MUST use line ranges to read specific portions of large files, rather than reading entire files when not needed +- You MUST combine adjacent line ranges (<10 lines apart) +- You MUST use multiple ranges for content separated by >10 lines +- You MUST include sufficient line context for planned modifications while keeping ranges minimal +` + : "" +} +${isMultipleReadsEnabled ? `- When you need to read more than ${maxConcurrentReads} files, prioritize the most critical files first, then use subsequent read_file requests for additional files` : ""}`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/run-slash-command-schema.ts b/src/core/prompts/tools/schemas/run-slash-command-schema.ts new file mode 100644 index 000000000000..6b7a4b28c987 --- /dev/null +++ b/src/core/prompts/tools/schemas/run-slash-command-schema.ts @@ -0,0 +1,53 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateRunSlashCommandSchema(args: ToolArgs): BaseToolSchema { + const schema: BaseToolSchema = { + name: "run_slash_command", + description: `Execute a slash command to get specific instructions or content. Slash commands are predefined templates that provide detailed guidance for common tasks. +The command content will be returned for you to execute or follow as instructions.`, + parameters: [ + { + name: "command", + type: "string", + description: `The name of the slash command to execute (e.g., "init", "test", "deploy")`, + required: true, + }, + { + name: "args", + type: "string", + description: `Additional arguments or context to pass to the command`, + required: false, + }, + ], + systemPrompt: `## run_slash_command +Description: Execute a slash command to get specific instructions or content. Slash commands are predefined templates that provide detailed guidance for common tasks. + +Parameters: +- command: (required) The name of the slash command to execute (e.g., "init", "test", "deploy") +- args: (optional) Additional arguments or context to pass to the command + +Usage: + +command_name +optional arguments + + +Examples: + +1. Running the init command to analyze a codebase: + +init + + +2. Running a command with additional context: + +test +focus on integration tests + + +The command content will be returned for you to execute or follow as instructions.`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/search-and-replace-schema.ts b/src/core/prompts/tools/schemas/search-and-replace-schema.ts new file mode 100644 index 000000000000..27b3a336e055 --- /dev/null +++ b/src/core/prompts/tools/schemas/search-and-replace-schema.ts @@ -0,0 +1,91 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateSearchAndReplaceSchema(args: ToolArgs): BaseToolSchema { + const schema: BaseToolSchema = { + name: "search_and_replace", + description: + "Find and replace specific text strings or patterns (using regex) within a file. Suitable for targeted replacements across multiple locations within the file. Supports literal text and regex patterns, case sensitivity options, and optional line ranges. Shows a diff preview before applying changes.", + parameters: [ + { + name: "path", + type: "string", + description: `File path to modify (relative to workspace directory ${args.cwd})`, + required: true, + }, + { + name: "search", + type: "string", + description: "Text or pattern to search for", + required: true, + }, + { + name: "replace", + type: "string", + description: "Text to replace matches with", + required: true, + }, + { + name: "start_line", + type: "number", + description: "Starting line number for restricted replacement (1-based)", + required: false, + }, + { + name: "end_line", + type: "number", + description: "Ending line number for restricted replacement (1-based)", + required: false, + }, + { + name: "use_regex", + type: "boolean", + description: "Treat search as a regex pattern", + required: false, + }, + { + name: "ignore_case", + type: "boolean", + description: "Ignore case when matching", + required: false, + }, + ], + systemPrompt: `## search_and_replace +Description: Use this tool to find and replace specific text strings or patterns (using regex) within a file. It's suitable for targeted replacements across multiple locations within the file. Supports literal text and regex patterns, case sensitivity options, and optional line ranges. Shows a diff preview before applying changes. + +Required Parameters: +- path: The path of the file to modify (relative to the current workspace directory ${args.cwd.toPosix()}) +- search: The text or pattern to search for +- replace: The text to replace matches with + +Optional Parameters: +- start_line: Starting line number for restricted replacement (1-based) +- end_line: Ending line number for restricted replacement (1-based) +- use_regex: Set to "true" to treat search as a regex pattern (default: false) +- ignore_case: Set to "true" to ignore case when matching (default: false) + +Notes: +- When use_regex is true, the search parameter is treated as a regular expression pattern +- When ignore_case is true, the search is case-insensitive regardless of regex mode + +Examples: + +1. Simple text replacement: + +example.ts +oldText +newText + + +2. Case-insensitive regex pattern: + +example.ts +old\w+ +new$& +true +true +`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/search-files-schema.ts b/src/core/prompts/tools/schemas/search-files-schema.ts new file mode 100644 index 000000000000..227fb8c8f296 --- /dev/null +++ b/src/core/prompts/tools/schemas/search-files-schema.ts @@ -0,0 +1,51 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateSearchFilesSchema(args: ToolArgs): BaseToolSchema { + const schema: BaseToolSchema = { + name: "search_files", + description: `Request to perform a regex search across files in a specified directory, providing context-rich results. This tool searches for patterns or specific content across multiple files, displaying each match with encapsulating context.`, + parameters: [ + { + name: "path", + type: "string", + description: `Directory path to search in (relative to workspace directory ${args.cwd}). This directory will be recursively searched. When searching the entire workspace, the parameter value is '.'`, + required: true, + }, + { + name: "regex", + type: "string", + description: "Regular expression pattern to search for. Uses Rust regex syntax.", + required: true, + }, + { + name: "file_pattern", + type: "string", + description: + "Glob pattern to filter files (e.g., '*.ts' for TypeScript files). If not provided, it will search all files (*).", + required: false, + }, + ], + systemPrompt: `## search_files +Description: Request to perform a regex search across files in a specified directory, providing context-rich results. This tool searches for patterns or specific content across multiple files, displaying each match with encapsulating context. +Parameters: +- path: (required) The path of the directory to search in (relative to the current workspace directory ${args.cwd}). This directory will be recursively searched. +- regex: (required) The regular expression pattern to search for. Uses Rust regex syntax. +- file_pattern: (optional) Glob pattern to filter files (e.g., '*.ts' for TypeScript files). If not provided, it will search all files (*). +Usage: + +Directory path here +Your regex pattern here +file pattern here (optional) + + +Example: Requesting to search for all .ts files in the current directory + +. +.* +*.ts +`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/switch-mode-schema.ts b/src/core/prompts/tools/schemas/switch-mode-schema.ts new file mode 100644 index 000000000000..1a14b9927505 --- /dev/null +++ b/src/core/prompts/tools/schemas/switch-mode-schema.ts @@ -0,0 +1,40 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateSwitchModeSchema(_: ToolArgs): BaseToolSchema { + return { + name: "switch_mode", + description: + "Switches to a different operational mode, like 'code' or 'architect'. The user must approve this switch. This is useful when a task requires a different set of capabilities.", + parameters: [ + { + name: "mode_slug", + type: "string", + description: "The unique identifier for the mode to switch to (e.g., 'code', 'ask', 'architect').", + required: true, + }, + { + name: "reason", + type: "string", + description: "An optional explanation for why the mode switch is necessary.", + required: false, + }, + ], + systemPrompt: `## switch_mode +Description: Request to switch to a different mode. This tool allows modes to request switching to another mode when needed, such as switching to Code mode to make code changes. The user must approve the mode switch. +Parameters: +- mode_slug: (required) The slug of the mode to switch to (e.g., "code", "ask", "architect") +- reason: (optional) The reason for switching modes +Usage: + +Mode slug here +Reason for switching here + + +Example: Requesting to switch to code mode + +code +Need to make code changes +`, + } +} diff --git a/src/core/prompts/tools/schemas/tool-registry.ts b/src/core/prompts/tools/schemas/tool-registry.ts new file mode 100644 index 000000000000..5b5c036355aa --- /dev/null +++ b/src/core/prompts/tools/schemas/tool-registry.ts @@ -0,0 +1,151 @@ +import { BaseToolSchema, generateFunctionCallSchema, generateAnthropicToolSchema } from "./base-tool-schema" +import { generateAccessMcpResourceSchema } from "./access-mcp-resource-schema" +import { generateApplyDiffSchema } from "./apply-diff-schema" +import { generateAskFollowupQuestionSchema } from "./ask-followup-question-schema" +import { generateAttemptCompletionSchema } from "./attempt-completion-schema" +import { generateBrowserActionSchema } from "./browser-action-schema" +import { generateCodebaseSearchSchema } from "./codebase-search-schema" +import { generateExecuteCommandSchema } from "./execute-command-schema" +import { generateFetchInstructionsSchema } from "./fetch-instructions-schema" +import { generateInsertContentSchema } from "./insert-content-schema" +import { generateListCodeDefinitionNamesSchema } from "./list-code-definition-names-schema" +import { generateListFilesSchema } from "./list-files-schema" +import { generateNewTaskSchema } from "./new-task-schema" +import { generateReadFileSchema } from "./read-file-schema" +import { generateSearchAndReplaceSchema } from "./search-and-replace-schema" +import { generateSearchFilesSchema } from "./search-files-schema" +import { generateSwitchModeSchema } from "./switch-mode-schema" +import { generateUpdateTodoListSchema } from "./update-todo-list-schema" +import { generateUseMcpToolSchema } from "./use-mcp-tool-schema" +import { generateWriteToFileSchema } from "./write-to-file-schema" +import { generateGenerateImageSchema } from "./generate-image-schema" +import { ToolArgs } from "../types" +import { type ToolName } from "@roo-code/types" +import { generateRunSlashCommandSchema } from "./run-slash-command-schema" + +/** + * Registry of tools that support native function calling + */ +export class ToolRegistry { + private static instance: ToolRegistry + private tools: Map BaseToolSchema | undefined> = new Map() + + private constructor() { + // Register supported tools + this.registerTool("access_mcp_resource", generateAccessMcpResourceSchema) + this.registerTool("apply_diff", generateApplyDiffSchema) + this.registerTool("ask_followup_question", generateAskFollowupQuestionSchema) + this.registerTool("attempt_completion", generateAttemptCompletionSchema) + this.registerTool("browser_action", generateBrowserActionSchema) + this.registerTool("codebase_search", generateCodebaseSearchSchema) + this.registerTool("execute_command", generateExecuteCommandSchema) + this.registerTool("fetch_instructions", generateFetchInstructionsSchema) + this.registerTool("generate_image", generateGenerateImageSchema) + this.registerTool("insert_content", generateInsertContentSchema) + this.registerTool("list_code_definition_names", generateListCodeDefinitionNamesSchema) + this.registerTool("list_files", generateListFilesSchema) + this.registerTool("new_task", generateNewTaskSchema) + this.registerTool("read_file", generateReadFileSchema) + this.registerTool("run_slash_command", generateRunSlashCommandSchema) + this.registerTool("search_and_replace", generateSearchAndReplaceSchema) + this.registerTool("search_files", generateSearchFilesSchema) + this.registerTool("switch_mode", generateSwitchModeSchema) + this.registerTool("update_todo_list", generateUpdateTodoListSchema) + this.registerTool("use_mcp_tool", generateUseMcpToolSchema) + this.registerTool("write_to_file", generateWriteToFileSchema) + } + + public static getInstance(): ToolRegistry { + if (!ToolRegistry.instance) { + ToolRegistry.instance = new ToolRegistry() + } + return ToolRegistry.instance + } + + /** + * Register a tool schema + */ + public registerTool(name: ToolName, schema: (args: ToolArgs) => BaseToolSchema | undefined): void { + this.tools.set(name, schema) + } + + /** + * Get all registered tool names + */ + public getToolNames(): string[] { + return Array.from(this.tools.keys()) + } + + /** + * Check if a tool supports function calling + */ + public isToolSupported(toolName: ToolName): boolean { + return this.tools.has(toolName) + } + + /** + * Get tool schema by name + */ + public getToolSchema(toolName: ToolName): ((args: ToolArgs) => BaseToolSchema | undefined) | undefined { + return this.tools.get(toolName) + } + + /** + * Generate OpenAI function call schemas for all supported tools + */ + public generateFunctionCallSchemas(toolNames: ToolName[], toolArgs?: ToolArgs): any[] { + const schemas: any[] = [] + + for (const toolName of toolNames) { + const schemaGenerate = this.tools.get(toolName) + if (schemaGenerate) { + const schema = schemaGenerate(toolArgs || ({} as ToolArgs)) + if (schema) { + schemas.push(generateFunctionCallSchema(schema)) + } + } + } + + return schemas + } + + /** + * Generate Anthropic tool schemas for all supported tools + */ + public generateAnthropicToolSchemas(toolNames: ToolName[], toolArgs?: ToolArgs): any[] { + const schemas: any[] = [] + + for (const toolName of toolNames) { + const schemaGenerate = this.tools.get(toolName) + if (schemaGenerate) { + const schema = schemaGenerate(toolArgs || ({} as ToolArgs)) + if (schema) { + schemas.push(generateAnthropicToolSchema(schema)) + } + } + } + + return schemas + } + + /** + * Get supported tools from a list of tool names + */ + public getSupportedTools(toolNames: ToolName[]): ToolName[] { + return toolNames.filter((toolName) => this.tools.has(toolName)) + } + + /** + * Get unsupported tools from a list of tool names + */ + public getUnsupportedTools(toolNames: ToolName[]): ToolName[] { + return toolNames.filter((toolName) => !this.tools.has(toolName)) + } +} + +/** + * Get the global tool registry instance + */ +export function getToolRegistry(): ToolRegistry { + return ToolRegistry.getInstance() +} diff --git a/src/core/prompts/tools/schemas/update-todo-list-schema.ts b/src/core/prompts/tools/schemas/update-todo-list-schema.ts new file mode 100644 index 000000000000..d02598bfdcf0 --- /dev/null +++ b/src/core/prompts/tools/schemas/update-todo-list-schema.ts @@ -0,0 +1,90 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateUpdateTodoListSchema(args: ToolArgs): BaseToolSchema { + const schema: BaseToolSchema = { + name: "update_todo_list", + description: + "Replaces the current TODO list with an updated one. This is used for tracking task progress, allows for updating multiple statuses at once, and supports adding new tasks as they arise.", + parameters: [ + { + name: "todos", + type: "string", + description: + "The complete TODO list in Markdown checklist format. Use '[ ]' for pending, '[x]' for completed, and '[-]' for in-progress tasks.", + required: true, + }, + ], + systemPrompt: `## update_todo_list + +**Description:** +Replace the entire TODO list with an updated checklist reflecting the current state. Always provide the full list; the system will overwrite the previous one. This tool is designed for step-by-step task tracking, allowing you to confirm completion of each step before updating, update multiple task statuses at once (e.g., mark one as completed and start the next), and dynamically add new todos discovered during long or complex tasks. + +**Checklist Format:** +- Use a single-level markdown checklist (no nesting or subtasks). +- List todos in the intended execution order. +- Status options: + - [ ] Task description (pending) + - [x] Task description (completed) + - [-] Task description (in progress) + +**Status Rules:** +- [ ] = pending (not started) +- [x] = completed (fully finished, no unresolved issues) +- [-] = in_progress (currently being worked on) + +**Core Principles:** +- Before updating, always confirm which todos have been completed since the last update. +- You may update multiple statuses in a single update (e.g., mark the previous as completed and the next as in progress). +- When a new actionable item is discovered during a long or complex task, add it to the todo list immediately. +- Do not remove any unfinished todos unless explicitly instructed. +- Always retain all unfinished tasks, updating their status as needed. +- Only mark a task as completed when it is fully accomplished (no partials, no unresolved dependencies). +- If a task is blocked, keep it as in_progress and add a new todo describing what needs to be resolved. +- Remove tasks only if they are no longer relevant or if the user requests deletion. + +**Usage Example:** + + +[x] Analyze requirements +[x] Design architecture +[-] Implement core logic +[ ] Write tests +[ ] Update documentation + + + +*After completing "Implement core logic" and starting "Write tests":* + + +[x] Analyze requirements +[x] Design architecture +[x] Implement core logic +[-] Write tests +[ ] Update documentation +[ ] Add performance benchmarks + + + +**When to Use:** +- The task is complicated or involves multiple steps or requires ongoing tracking. +- You need to update the status of several todos at once. +- New actionable items are discovered during task execution. +- The user requests a todo list or provides multiple tasks. +- The task is complex and benefits from clear, stepwise progress tracking. + +**When NOT to Use:** +- There is only a single, trivial task. +- The task can be completed in one or two simple steps. +- The request is purely conversational or informational. + +**Task Management Guidelines:** +- Mark task as completed immediately after all work of the current task is done. +- Start the next task by marking it as in_progress. +- Add new todos as soon as they are identified. +- Use clear, descriptive task names. +`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/use-mcp-tool-schema.ts b/src/core/prompts/tools/schemas/use-mcp-tool-schema.ts new file mode 100644 index 000000000000..4ed833f89f5e --- /dev/null +++ b/src/core/prompts/tools/schemas/use-mcp-tool-schema.ts @@ -0,0 +1,66 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateUseMcpToolSchema(args: ToolArgs): BaseToolSchema | undefined { + if (!args.mcpHub) { + return undefined + } + + const schema: BaseToolSchema = { + name: "use_mcp_tool", + description: + "Request to use a tool provided by a connected MCP server. Each MCP server can provide multiple tools with different capabilities. Tools have defined input schemas that specify required and optional parameters.", + parameters: [ + { + name: "server_name", + type: "string", + description: "The name of the MCP server providing the tool", + required: true, + }, + { + name: "tool_name", + type: "string", + description: "The name of the tool to execute", + required: true, + }, + { + name: "arguments", + type: "string", + description: "A JSON object containing the tool's input parameters, following the tool's input schema", + required: true, + }, + ], + systemPrompt: `## use_mcp_tool +Description: Request to use a tool provided by a connected MCP server. Each MCP server can provide multiple tools with different capabilities. Tools have defined input schemas that specify required and optional parameters. +Parameters: +- server_name: (required) The name of the MCP server providing the tool +- tool_name: (required) The name of the tool to execute +- arguments: (required) A JSON object containing the tool's input parameters, following the tool's input schema +Usage: + +server name here +tool name here + +{ + "param1": "value1", + "param2": "value2" +} + + + +Example: Requesting to use an MCP tool + + +weather-server +get_forecast + +{ + "city": "San Francisco", + "days": 5 +} + +`, + } + + return schema +} diff --git a/src/core/prompts/tools/schemas/write-to-file-schema.ts b/src/core/prompts/tools/schemas/write-to-file-schema.ts new file mode 100644 index 000000000000..d60dc31f7769 --- /dev/null +++ b/src/core/prompts/tools/schemas/write-to-file-schema.ts @@ -0,0 +1,69 @@ +import { ToolArgs } from "../types" +import { BaseToolSchema } from "./base-tool-schema" + +export function generateWriteToFileSchema(args: ToolArgs): BaseToolSchema { + const schema: BaseToolSchema = { + name: "write_to_file", + description: `Request to write content to a file. This tool is primarily used for **creating new files** or for scenarios where a **complete rewrite of an existing file is intentionally required**. If the file exists, it will be overwritten. If it doesn't exist, it will be created. This tool will automatically create any directories needed to write the file.`, + parameters: [ + { + name: "path", + type: "string", + description: `The path of the file to write to (relative to the current workspace directory ${args.cwd})`, + required: true, + }, + { + name: "content", + type: "string", + description: + "Content to write to the file. When performing a full rewrite of an existing file or creating a new one, ALWAYS provide the COMPLETE intended content of the file, without any truncation or omissions. You MUST include ALL parts of the file, even if they haven't been modified. Do NOT include the line numbers in the content though, just the actual content of the file.", + required: true, + }, + { + name: "line_count", + type: "number", + description: + "The number of lines in the file. Make sure to compute this based on the actual content of the file, not the number of lines in the content you're providing.", + required: true, + }, + ], + systemPrompt: `## write_to_file +Description: Request to write content to a file. This tool is primarily used for **creating new files** or for scenarios where a **complete rewrite of an existing file is intentionally required**. If the file exists, it will be overwritten. If it doesn't exist, it will be created. This tool will automatically create any directories needed to write the file. +Parameters: +- path: (required) The path of the file to write to (relative to the current workspace directory ${args.cwd}) +- content: (required) The content to write to the file. When performing a full rewrite of an existing file or creating a new one, ALWAYS provide the COMPLETE intended content of the file, without any truncation or omissions. You MUST include ALL parts of the file, even if they haven't been modified. Do NOT include the line numbers in the content though, just the actual content of the file. +- line_count: (required) The number of lines in the file. Make sure to compute this based on the actual content of the file, not the number of lines in the content you're providing. +Usage: + +File path here + +Your file content here + +total number of lines in the file, including empty lines + + +Example: Requesting to write to frontend-config.json + +frontend-config.json + +{ + "apiEndpoint": "https://api.example.com", + "theme": { + "primaryColor": "#007bff", + "secondaryColor": "#6c757d", + "fontFamily": "Arial, sans-serif" + }, + "features": { + "darkMode": true, + "notifications": true, + "analytics": false + }, + "version": "1.0.0" +} + +14 +`, + } + + return schema +} diff --git a/src/core/prompts/tools/tool-availability.ts b/src/core/prompts/tools/tool-availability.ts new file mode 100644 index 000000000000..bdf617c68538 --- /dev/null +++ b/src/core/prompts/tools/tool-availability.ts @@ -0,0 +1,106 @@ +import type { ToolName, ModeConfig } from "@roo-code/types" + +import { TOOL_GROUPS, ALWAYS_AVAILABLE_TOOLS } from "../../../shared/tools" +import { McpHub } from "../../../services/mcp/McpHub" +import { Mode, getModeConfig, isToolAllowedForMode, getGroupName } from "../../../shared/modes" +import { CodeIndexManager } from "../../../services/code-index/manager" +import { getToolRegistry } from "./schemas/tool-registry" +import { ToolArgs } from "./types" + +export interface ToolAvailabilityResult { + /** + * All available tools for the current mode and configuration + */ + availableTools: ToolName[] + + /** + * Tools that should use XML descriptions (traditional approach) + */ + xmlTools: ToolName[] + + /** + * Tools that should use native tool calls + */ + toolCallTools: ToolName[] +} + +export interface ToolAvailabilityArgs extends ToolArgs { + mode: Mode + codeIndexManager?: CodeIndexManager + customModes?: ModeConfig[] +} + +export function getToolAvailability(args: ToolAvailabilityArgs): ToolAvailabilityResult { + const { mode, codeIndexManager, customModes, experiments, settings } = args + + const config = getModeConfig(mode, customModes) + const tools = new Set() + + // Add tools from mode's groups + config.groups.forEach((groupEntry) => { + const groupName = getGroupName(groupEntry) + const toolGroup = TOOL_GROUPS[groupName] + if (toolGroup) { + toolGroup.tools.forEach((tool) => { + if ( + isToolAllowedForMode( + tool as ToolName, + mode, + customModes ?? [], + undefined, + undefined, + experiments ?? {}, + ) + ) { + tools.add(tool as ToolName) + } + }) + } + }) + + // Add always available tools + ALWAYS_AVAILABLE_TOOLS.forEach((tool) => tools.add(tool)) + + // Conditionally exclude codebase_search if feature is disabled or not configured + if ( + !codeIndexManager || + !(codeIndexManager.isFeatureEnabled && codeIndexManager.isFeatureConfigured && codeIndexManager.isInitialized) + ) { + tools.delete("codebase_search") + } + + // Conditionally exclude update_todo_list if disabled in settings + if (settings?.todoListEnabled === false) { + tools.delete("update_todo_list") + } + + // Conditionally exclude generate_image if experiment is not enabled + if (!experiments?.imageGeneration) { + tools.delete("generate_image") + } + + // Conditionally exclude run_slash_command if experiment is not enabled + if (!experiments?.runSlashCommand) { + tools.delete("run_slash_command") + } + + const availableTools = Array.from(tools) + + // Determine which tools should use tool calls vs XML + let toolCallTools: ToolName[] = [] + let xmlTools: ToolName[] = [...availableTools] + + if (settings?.toolCallEnabled === true) { + const toolRegistry = getToolRegistry() + toolCallTools = toolRegistry.getSupportedTools(availableTools) + + // Remove tool call tools from XML tools list + xmlTools = xmlTools.filter((tool) => !toolCallTools.includes(tool)) + } + + return { + availableTools, + xmlTools, + toolCallTools, + } +} diff --git a/src/core/prompts/types.ts b/src/core/prompts/types.ts index d90b1b821ab4..523581c21aca 100644 --- a/src/core/prompts/types.ts +++ b/src/core/prompts/types.ts @@ -4,6 +4,7 @@ export interface SystemPromptSettings { maxConcurrentFileReads: number todoListEnabled: boolean + toolCallEnabled?: boolean useAgentRules: boolean newTaskRequireTodos: boolean } diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index c5be865731ab..e34ea453f834 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -42,6 +42,8 @@ import { CloudService, BridgeOrchestrator } from "@roo-code/cloud" // api import { ApiHandler, ApiHandlerCreateMessageMetadata, buildApiHandler } from "../../api" import { ApiStream, GroundingSource } from "../../api/transform/stream" +import { getGroupName } from "../../shared/modes" +import { getToolAvailability, type ToolAvailabilityArgs } from "../prompts/tools/tool-availability" import { maybeRemoveImageBlocks } from "../../api/transform/image-cleaning" // shared @@ -52,8 +54,8 @@ import { t } from "../../i18n" import { ClineApiReqCancelReason, ClineApiReqInfo } from "../../shared/ExtensionMessage" import { getApiMetrics, hasTokenUsageChanged } from "../../shared/getApiMetrics" import { ClineAskResponse } from "../../shared/WebviewMessage" -import { defaultModeSlug } from "../../shared/modes" -import { DiffStrategy } from "../../shared/tools" +import { defaultModeSlug, modes, getModeBySlug } from "../../shared/modes" +import { DiffStrategy, supportToolCall } from "../../shared/tools" import { EXPERIMENT_IDS, experiments } from "../../shared/experiments" import { getModelMaxOutputTokens } from "../../shared/api" @@ -63,6 +65,7 @@ import { BrowserSession } from "../../services/browser/BrowserSession" import { McpHub } from "../../services/mcp/McpHub" import { McpServerManager } from "../../services/mcp/McpServerManager" import { RepoPerTaskCheckpointService } from "../../services/checkpoints" +import { CodeIndexManager } from "../../services/code-index/manager" // integrations import { DiffViewProvider } from "../../integrations/editor/DiffViewProvider" @@ -114,6 +117,8 @@ import { Gpt5Metadata, ClineMessageWithMetadata } from "./types" import { MessageQueueService } from "../message-queue/MessageQueueService" import { AutoApprovalHandler } from "./AutoApprovalHandler" +import { StreamingToolCallProcessor, ToolCallParam, handleOpenaiToolCallStreaming } from "./tool-call-helper" +import { ToolArgs } from "../prompts/tools/types" const MAX_EXPONENTIAL_BACKOFF_SECONDS = 600 // 10 minutes const DEFAULT_USAGE_COLLECTION_TIMEOUT_MS = 5000 // 5 seconds @@ -264,6 +269,9 @@ export class Task extends EventEmitter implements TaskLike { consecutiveMistakeCountForApplyDiff: Map = new Map() toolUsage: ToolUsage = {} + // Streaming Tool Call Processing + streamingToolCallProcessor: StreamingToolCallProcessor = new StreamingToolCallProcessor() + // Checkpoints enableCheckpoints: boolean checkpointService?: RepoPerTaskCheckpointService @@ -284,7 +292,7 @@ export class Task extends EventEmitter implements TaskLike { assistantMessageContent: AssistantMessageContent[] = [] presentAssistantMessageLocked = false presentAssistantMessageHasPendingUpdates = false - userMessageContent: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[] = [] + userMessageContent: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam | Anthropic.ToolResultBlockParam)[] = [] userMessageContentReady = false didRejectTool = false didAlreadyUseTool = false @@ -782,7 +790,14 @@ export class Task extends EventEmitter implements TaskLike { this.askResponseImages = undefined askTs = Date.now() this.lastMessageTs = askTs - await this.addToClineMessages({ ts: askTs, type: "ask", ask: type, text, isProtected }) + await this.addToClineMessages({ + ts: askTs, + type: "ask", + ask: type, + text, + isProtected, + progressStatus, + }) } } } else { @@ -792,7 +807,7 @@ export class Task extends EventEmitter implements TaskLike { this.askResponseImages = undefined askTs = Date.now() this.lastMessageTs = askTs - await this.addToClineMessages({ ts: askTs, type: "ask", ask: type, text, isProtected }) + await this.addToClineMessages({ ts: askTs, type: "ask", ask: type, text, isProtected, progressStatus }) } // The state is mutable if the message is complete and the task will @@ -1170,7 +1185,9 @@ export class Task extends EventEmitter implements TaskLike { relPath ? ` for '${relPath.toPosix()}'` : "" } without value for required parameter '${paramName}'. Retrying...`, ) - return formatResponse.toolError(formatResponse.missingToolParameterError(paramName)) + return formatResponse.toolError( + formatResponse.missingToolParameterError(paramName, this.apiConfiguration?.toolCallEnabled === true), + ) } // Lifecycle @@ -1311,41 +1328,41 @@ export class Task extends EventEmitter implements TaskLike { // Make sure that the api conversation history can be resumed by the API, // even if it goes out of sync with cline messages. let existingApiConversationHistory: ApiMessage[] = await this.getSavedApiConversationHistory() - - // v2.0 xml tags refactor caveat: since we don't use tools anymore, we need to replace all tool use blocks with a text block since the API disallows conversations with tool uses and no tool schema - const conversationWithoutToolBlocks = existingApiConversationHistory.map((message) => { - if (Array.isArray(message.content)) { - const newContent = message.content.map((block) => { - if (block.type === "tool_use") { - // It's important we convert to the new tool schema - // format so the model doesn't get confused about how to - // invoke tools. - const inputAsXml = Object.entries(block.input as Record) - .map(([key, value]) => `<${key}>\n${value}\n`) - .join("\n") - return { - type: "text", - text: `<${block.name}>\n${inputAsXml}\n`, - } as Anthropic.Messages.TextBlockParam - } else if (block.type === "tool_result") { - // Convert block.content to text block array, removing images - const contentAsTextBlocks = Array.isArray(block.content) - ? block.content.filter((item) => item.type === "text") - : [{ type: "text", text: block.content }] - const textContent = contentAsTextBlocks.map((item) => item.text).join("\n\n") - const toolName = findToolName(block.tool_use_id, existingApiConversationHistory) - return { - type: "text", - text: `[${toolName} Result]\n\n${textContent}`, - } as Anthropic.Messages.TextBlockParam - } - return block - }) - return { ...message, content: newContent } - } - return message - }) - existingApiConversationHistory = conversationWithoutToolBlocks + if (this.apiConfiguration.toolCallEnabled !== true) { + const conversationWithoutToolBlocks = existingApiConversationHistory.map((message) => { + if (Array.isArray(message.content)) { + const newContent = message.content.map((block) => { + if (block.type === "tool_use") { + // It's important we convert to the new tool schema + // format so the model doesn't get confused about how to + // invoke tools. + const inputAsXml = Object.entries(block.input as Record) + .map(([key, value]) => `<${key}>\n${value}\n`) + .join("\n") + return { + type: "text", + text: `<${block.name}>\n${inputAsXml}\n`, + } as Anthropic.Messages.TextBlockParam + } else if (block.type === "tool_result") { + // Convert block.content to text block array, removing images + const contentAsTextBlocks = Array.isArray(block.content) + ? block.content.filter((item) => item.type === "text") + : [{ type: "text", text: block.content }] + const textContent = contentAsTextBlocks.map((item) => item.text).join("\n\n") + const toolName = findToolName(block.tool_use_id, existingApiConversationHistory) + return { + type: "text", + text: `[${toolName} Result]\n\n${textContent}`, + } as Anthropic.Messages.TextBlockParam + } + return block + }) + return { ...message, content: newContent } + } + return message + }) + existingApiConversationHistory = conversationWithoutToolBlocks + } // FIXME: remove tool use blocks altogether @@ -1688,7 +1705,9 @@ export class Task extends EventEmitter implements TaskLike { // the user hits max requests and denies resetting the count. break } else { - nextUserContent = [{ type: "text", text: formatResponse.noToolsUsed() }] + nextUserContent = [ + { type: "text", text: formatResponse.noToolsUsed(this.apiConfiguration?.toolCallEnabled ?? false) }, + ] this.consecutiveMistakeCount++ } } @@ -1919,6 +1938,9 @@ export class Task extends EventEmitter implements TaskLike { await this.diffViewProvider.reset() + // Reset streaming tool call processor + this.streamingToolCallProcessor.reset() + // Yields only if the first chunk is successful, otherwise will // allow the user to retry the request (most likely due to rate // limit error, which gets thrown on the first chunk). @@ -1959,12 +1981,29 @@ export class Task extends EventEmitter implements TaskLike { pendingGroundingSources.push(...chunk.sources) } break - case "text": { - assistantMessage += chunk.text + case "text": + case "tool_call": { + let chunkContent + let toolParam: ToolCallParam | undefined + if (chunk.type == "tool_call") { + toolParam = + handleOpenaiToolCallStreaming( + this.streamingToolCallProcessor, + chunk.toolCalls, + chunk.toolCallType, + ) ?? "" + chunkContent = toolParam.chunkContent + } else { + chunkContent = chunk.text + } + assistantMessage += chunkContent // Parse raw assistant message chunk into content blocks. const prevLength = this.assistantMessageContent.length - this.assistantMessageContent = this.assistantMessageParser.processChunk(chunk.text) + this.assistantMessageContent = this.assistantMessageParser.processChunk( + chunkContent, + toolParam, + ) if (this.assistantMessageContent.length > prevLength) { // New content we need to present, reset to @@ -2185,7 +2224,7 @@ export class Task extends EventEmitter implements TaskLike { const streamingFailedMessage = this.abort ? undefined : (error.message ?? JSON.stringify(serializeError(error), null, 2)) - + console.log(error) // Now call abortTask after determining the cancel reason. await this.abortTask() await abortStream(cancelReason, streamingFailedMessage) @@ -2225,6 +2264,7 @@ export class Task extends EventEmitter implements TaskLike { // Now that the stream is complete, finalize any remaining partial content blocks this.assistantMessageParser.finalizeContentBlocks() this.assistantMessageContent = this.assistantMessageParser.getContentBlocks() + this.streamingToolCallProcessor.reset() if (partialBlocks.length > 0) { // If there is content to update then it will complete and @@ -2261,11 +2301,39 @@ export class Task extends EventEmitter implements TaskLike { isNonInteractive: true, }) } - - await this.addToApiConversationHistory({ - role: "assistant", - content: [{ type: "text", text: assistantMessage }], - }) + if (this.apiConfiguration.toolCallEnabled !== true) { + await this.addToApiConversationHistory({ + role: "assistant", + content: [{ type: "text", text: assistantMessage }], + }) + } else { + let addToolIds: Set = new Set() + const newAssistantMessageContent: Array = [] + for (const block of this.assistantMessageContent) { + if (block.type === "text" && block.content) { + newAssistantMessageContent.push({ type: "text", text: block.content }) + } + if (block.type === "tool_use" && block.toolUseId && block.toolUseParam) { + // ignore same tool id + if (addToolIds.has(block.toolUseId)) { + continue + } + newAssistantMessageContent.push({ + type: "tool_use", + id: block.toolUseId, + name: block.name, + input: block.toolUseParam.input, + }) + addToolIds.add(block.toolUseId) + } + } + if (newAssistantMessageContent.length > 0) { + await this.addToApiConversationHistory({ + role: "assistant", + content: newAssistantMessageContent, + }) + } + } TelemetryService.instance.captureConversationMessage(this.taskId, "assistant") @@ -2292,7 +2360,10 @@ export class Task extends EventEmitter implements TaskLike { const didToolUse = this.assistantMessageContent.some((block) => block.type === "tool_use") if (!didToolUse) { - this.userMessageContent.push({ type: "text", text: formatResponse.noToolsUsed() }) + this.userMessageContent.push({ + type: "text", + text: formatResponse.noToolsUsed(this.apiConfiguration?.toolCallEnabled ?? false), + }) this.consecutiveMistakeCount++ } @@ -2408,6 +2479,8 @@ export class Task extends EventEmitter implements TaskLike { { maxConcurrentFileReads: maxConcurrentFileReads ?? 5, todoListEnabled: apiConfiguration?.todoListEnabled ?? true, + toolCallEnabled: + (apiConfiguration?.toolCallEnabled ?? false) && supportToolCall(apiConfiguration?.apiProvider), useAgentRules: vscode.workspace.getConfiguration("roo-cline").get("useAgentRules") ?? true, newTaskRequireTodos: vscode.workspace .getConfiguration("roo-cline") @@ -2651,6 +2724,95 @@ export class Task extends EventEmitter implements TaskLike { // non-fatal } + // Generate tool schemas if toolCallEnabled is true + let tools: ToolName[] | undefined = undefined + let toolArgs: ToolArgs | undefined + const apiProvider = this.apiConfiguration.apiProvider + if (this.apiConfiguration.toolCallEnabled === true && supportToolCall(apiProvider)) { + const provider = this.providerRef.deref() + + if (provider) { + const { + customModes, + mcpEnabled, + diffEnabled, + browserViewportSize, + experiments, + enableMcpServerCreation, + maxConcurrentFileReads, + maxReadFileLine, + browserToolEnabled, + } = state ?? {} + // Determine if browser tools can be used based on model support, mode, and user settings + let modelSupportsComputerUse = false + + // Create a temporary API handler to check if the model supports computer use + // This avoids relying on an active Cline instance which might not exist during preview + try { + const tempApiHandler = buildApiHandler(apiConfiguration!) + modelSupportsComputerUse = tempApiHandler.getModel().info.supportsComputerUse ?? false + } catch (error) { + console.error("Error checking if model supports computer use:", error) + } + + const modeConfig = getModeBySlug(mode!, customModes) || modes.find((m) => m.slug === mode) || modes[0] + + const modeSupportsBrowser = + modeConfig?.groups.some((group) => getGroupName(group) === "browser") ?? false + + // Only enable browser tools if the model supports it, the mode includes browser tools, + // and browser tools are enabled in settings + const canUseBrowserTool = + modelSupportsComputerUse && modeSupportsBrowser && (browserToolEnabled ?? true) + + let mcpHub: McpHub | undefined + if (mcpEnabled ?? true) { + // Wait for MCP hub initialization through McpServerManager + mcpHub = await McpServerManager.getInstance(provider.context, provider) + + if (!mcpHub) { + throw new Error("Failed to get MCP hub from server manager") + } + + // Wait for MCP servers to be connected before generating system prompt + await pWaitFor(() => !mcpHub!.isConnecting, { timeout: 10_000 }).catch(() => { + console.error("MCP servers failed to connect in time") + }) + } + const hasMcpGroup = modeConfig.groups.some((groupEntry) => getGroupName(groupEntry) === "mcp") + const hasMcpServers = mcpHub && mcpHub.getServers().length > 0 + const shouldIncludeMcp = hasMcpGroup && hasMcpServers + // Use the unified tool availability method + const codeIndexManager = CodeIndexManager.getInstance(provider.context, this.cwd) + const toolAvailabilityArgs: ToolAvailabilityArgs = { + mode: mode!, + cwd: this.cwd, + supportsComputerUse: canUseBrowserTool, + codeIndexManager, + diffStrategy: diffEnabled ? this.diffStrategy : undefined, + browserViewportSize, + mcpHub: shouldIncludeMcp ? provider.getMcpHub() : undefined, + customModes, + experiments, + partialReadsEnabled: maxReadFileLine !== -1, + settings: { + maxConcurrentFileReads: maxConcurrentFileReads ?? 5, + todoListEnabled: apiConfiguration?.todoListEnabled ?? true, + toolCallEnabled: + (apiConfiguration?.toolCallEnabled ?? false) && + supportToolCall(apiConfiguration?.apiProvider), + useAgentRules: + vscode.workspace.getConfiguration("roo-cline").get("useAgentRules") ?? true, + enableMcpServerCreation, + }, + } + + const { toolCallTools } = getToolAvailability(toolAvailabilityArgs) + tools = toolCallTools + toolArgs = toolAvailabilityArgs + } + } + const metadata: ApiHandlerCreateMessageMetadata = { mode: mode, taskId: this.taskId, @@ -2658,6 +2820,8 @@ export class Task extends EventEmitter implements TaskLike { ...(previousResponseId && !this.skipPrevResponseIdOnce ? { previousResponseId } : {}), // If a condense just occurred, explicitly suppress continuity fallback for the next call ...(this.skipPrevResponseIdOnce ? { suppressPreviousResponseId: true } : {}), + tools: tools, + toolArgs: toolArgs, } // Reset skip flag after applying (it only affects the immediate next call) diff --git a/src/core/task/__tests__/tool-call-helper.spec.ts b/src/core/task/__tests__/tool-call-helper.spec.ts new file mode 100644 index 000000000000..44e34b9b1ba9 --- /dev/null +++ b/src/core/task/__tests__/tool-call-helper.spec.ts @@ -0,0 +1,447 @@ +// @vitest-environment node + +/** + * @fileoverview + * StreamingToolCallProcessor & handleOpenaiToolCallStreaming 单元测试 + */ + +import { describe, it, expect, beforeEach } from "vitest" +import { StreamingToolCallProcessor, handleOpenaiToolCallStreaming } from "../tool-call-helper" + +describe("StreamingToolCallProcessor", () => { + let processor: StreamingToolCallProcessor + + beforeEach(() => { + processor = new StreamingToolCallProcessor() + }) + + it("should process a simple function call with string arguments", () => { + const chunk = [{ index: 0, id: "1", function: { name: "read_file", arguments: '{"msg":"hello"}' } }] + const xml = processor.processChunk(chunk) + expect(xml).toContain("") + expect(xml).toContain("hello") + }) + + it("should handle incremental argument streaming", () => { + const chunk1 = [{ index: 0, id: "1", function: { name: "write_to_file", arguments: '{"a":' } }] + const chunk2 = [{ index: 0, id: "1", function: { name: "", arguments: '1,"b":2}' } }] + let xml = processor.processChunk(chunk1) + expect(xml).toContain("") + expect(xml).not.toContain("1") + xml += processor.processChunk(chunk2) + expect(xml).toContain("1") + expect(xml).toContain("2") + expect(xml).toContain("") + }) + + it("should finalize incomplete tool calls", () => { + const chunk = [{ index: 0, id: "1", function: { name: "search_files", arguments: '{"foo":"bar"' } }] + let finalXml = processor.processChunk(chunk) + finalXml += processor.finalize() + expect(finalXml).toContain("bar") + expect(finalXml).toContain("") + }) + + it("should reset state", () => { + const chunk = [{ index: 0, id: "1", function: { name: "list_files", arguments: '{"x":1}' } }] + processor.processChunk(chunk) + processor.reset() + const xml = processor.processChunk(chunk) + expect(xml).toContain("") + expect(xml).toContain("1") + }) + + it("should handle multiple tool calls (multi-index)", () => { + const chunk = [ + { index: 0, id: "1", function: { name: "execute_command", arguments: '{"a":1}' } }, + { index: 1, id: "2", function: { name: "browser_action", arguments: '{"b":2}' } }, + ] + const xml = processor.processChunk(chunk) + expect(xml).toContain("") + expect(xml).toContain("1") + expect(xml).toContain("") + expect(xml).toContain("2") + }) + + it("should handle array and nested objects", () => { + const chunk = [ + { index: 0, id: "1", function: { name: "use_mcp_tool", arguments: '{"arr":[1,2],"obj":{"k":"v"}}' } }, + ] + const xml = processor.processChunk(chunk) + expect(xml).toContain("") + expect(xml).toContain("") + expect(xml).toContain("v") + }) + it("should handle deeply nested and mixed arrays/objects", () => { + const chunk = [ + { + index: 0, + id: "1", + function: { + name: "access_mcp_resource", + arguments: '{"level1":{"level2":{"arr":[{"x":1},{"y":[2,3,{"z":"end"}]}],"val":42},"emptyArr":[]}}', + }, + }, + ] + const xml = processor.processChunk(chunk) + expect(xml).toContain("") + expect(xml).toContain("") + expect(xml).toContain("") + expect(xml).toContain("1") + expect(xml).toContain("") + expect(xml).toContain("end") + expect(xml).toContain("42") + expect(xml).toContain("") + }) + + it("should handle incomplete deeply nested JSON streamed in multiple chunks", () => { + const chunk1 = [ + { + index: 0, + id: "1", + function: { + name: "ask_followup_question", + arguments: '{"foo":{"bar":[{"baz":1},', + }, + }, + ] + const chunk2 = [ + { + index: 0, + id: "1", + function: { + name: "", + arguments: '{"baz":2},{"baz":3}]}, "tail":', + }, + }, + ] + const chunk3 = [ + { + index: 0, + id: "1", + function: { + name: "", + arguments: '"done"', + }, + }, + ] + let xml = processor.processChunk(chunk1) + expect(xml).toContain("") + expect(xml).toContain("") + expect(xml).toContain("") + expect(xml).toContain("1") + expect(xml).not.toContain("2") + xml += processor.processChunk(chunk2) + expect(xml).toContain("2") + expect(xml).toContain("3") + xml += processor.processChunk(chunk3) + expect(xml).toContain("done") + expect(xml).not.toContain("") + xml += processor.finalize() + expect(xml).toContain("") + }) + + it("should handle invalid JSON gracefully", () => { + const chunk = [{ index: 0, id: "1", function: { name: "attempt_completion", arguments: '{"a":' } }] + expect(() => processor.processChunk(chunk)).not.toThrow() + expect(() => processor.finalize()).not.toThrow() + }) + + it("should process read_file complete arguments", () => { + const chunk = [ + { + index: 0, + id: "1", + function: { + name: "read_file", + arguments: '{"args":{"file":[{"path":"abc/a/b/a.js"},{"path":"abc/c.js"}]}}', + }, + }, + ] + const xml = processor.processChunk(chunk) + expect(xml.trim()).toBe(` + + + abc/a/b/a.js + + + abc/c.js + + +`) + }) + + it("should handle read_file tool calls", () => { + let xml = "" + xml += processor.processChunk([ + { + index: 0, + id: "call_0_e4d7cf16-74e9-423a-bde5-47bb309978d5", + type: "function", + function: { name: "read_file", arguments: "" }, + }, + ]) + xml += processor.processChunk([{ index: 0, function: { arguments: '{"' } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "args" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: '":{"' } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "file" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: '":[' } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: '{"' } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "path" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: '":"' } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "abc" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "/a" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "/b" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "/a" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: ".js" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: '"},' } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: '{"' } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "path" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: '":"' } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "abc" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "/c" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: ".js" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: '"' } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "}]" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "}}" } }]) + expect(xml.trim()).toBe(` + + + abc/a/b/a.js + + + abc/c.js + + +`) + }) + + it("should handle write_to_file tool calls", () => { + let xml = "" + xml += processor.processChunk([ + { + index: 0, + id: "call_0_37f0c076-2c5f-4af0-b16b-cf6c0d7479f3", + type: "function", + function: { name: "write_to_file", arguments: "" }, + }, + ]) + + xml += processor.processChunk([{ index: 0, function: { arguments: '{"' } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "path" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: '":"' } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "abc" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "/a" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "/b" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "/a" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: ".js" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: '","' } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "content" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: '":"' } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "//" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " Function" } }]) + expect(xml).toContain(" Function") + xml += processor.processChunk([{ index: 0, function: { arguments: " to" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " add" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " two" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " numbers" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "\\n" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "function" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " add" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "Numbers" } }]) + expect(xml).toContain(" addNumbers") + xml += processor.processChunk([{ index: 0, function: { arguments: "(a" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "," } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " b" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: ")" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " {\\" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "n" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " " } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " return" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " a" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " +" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " b" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: ";\\" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "n" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "}\\" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "n" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "\\n" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "//" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " Example" } }]) + expect(xml).toContain(" Example") + xml += processor.processChunk([{ index: 0, function: { arguments: " usage" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "\\n" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "const" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " result" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " =" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " add" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "Numbers" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "(" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "5" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "," } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " " } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "7" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: ");" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "\\" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "n" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "console" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: ".log" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "(result" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: ");" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " //" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " Output" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: ":" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: " " } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "12" } }]) + expect(xml.endsWith("Output: 12")).toBe(true) + xml += processor.processChunk([{ index: 0, function: { arguments: '","' } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "line" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "_count" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: '":' } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "6" } }]) + xml += processor.processChunk([{ index: 0, function: { arguments: "}" } }]) + expect(xml.trim().endsWith("")).toBe(true) + }) +}) + +describe("handleOpenaiToolCallStreaming", () => { + it("should delegate to processor.processChunk", () => { + const processor = new StreamingToolCallProcessor() + const chunk = [{ index: 0, id: "1", function: { name: "read_file", arguments: '{"msg":"hi"}' } }] + const xml = handleOpenaiToolCallStreaming(processor, chunk, "openai").chunkContent + expect(xml).toContain("") + expect(xml).toContain("hi") + }) + + it("should delegate to apply_diff processor.processChunk", () => { + const processor = new StreamingToolCallProcessor() + const chunk = [ + { + index: 0, + id: "1", + function: { + name: "apply_diff", + arguments: + '{"args":{"file":[{"diff":[{"replace":"catch (Exception e) {if (true) {}throw e;}","search":"catch (Exception e) {throw e;}","start_line":252}],"path":"Test.java"},{"replace":"catch (Exception e) {if (true) {}throw e;}","search":"catch (Exception e) {throw e;}","start_line":252}],"path":"Test.java"}]}}', + }, + }, + ] + const xml = handleOpenaiToolCallStreaming(processor, chunk, "openai").chunkContent + expect(xml).toContain(` + +\t +\t\t +\t\t\tcatch (Exception e) {if (true) {}throw e;} +\t\t\tcatch (Exception e) {throw e;} +\t\t\t252 +\t\t +\t\tTest.java +\t +\t +\t\tcatch (Exception e) {if (true) {}throw e;} +\t\tcatch (Exception e) {throw e;} +\t\t252 +\t +\tTest.java + +`) + }) + + it("should delegate to apply_diff2 processor.processChunk ", () => { + const processor = new StreamingToolCallProcessor() + const chunk1 = [ + { + index: 0, + id: "1", + function: { + name: "apply_diff", + arguments: '{"args":{"file":[{"diff":[{"replace":"catch (Exception e) {if (1==1) {}throw e;}",', + }, + }, + ] + const chunk2 = [ + { + index: 0, + id: "", + function: { + name: "", + arguments: '"search":"catch (Exception e) {throw e;}","start_line":25', + }, + }, + ] + const chunk3 = [ + { + index: 0, + id: "", + function: { + name: "", + arguments: '2}],"path":"Test.java"}]}}', + }, + }, + ] + let xml = handleOpenaiToolCallStreaming(processor, chunk1, "openai").chunkContent + expect(xml).not.toContain("") + expect(xml).not.toContain("true") + xml += handleOpenaiToolCallStreaming(processor, chunk2, "openai").chunkContent + expect(xml).toContain("") + xml += handleOpenaiToolCallStreaming(processor, chunk3, "openai").chunkContent + expect(xml).toContain("252") + expect(xml).toContain(` + +\t +\t\t +\t\t\tcatch (Exception e) {if (1==1) {}throw e;} +\t\t\tcatch (Exception e) {throw e;} +\t\t\t252 +\t\t +\t\tTest.java +\t + +`) + }) + + it("should test read_file multiple file", () => { + const processor = new StreamingToolCallProcessor() + const input = `{"args": {"file": [{"path": "pom.xml", "line_range":["1-40","80-120"]}, {"path": "build.gradle"}, {"path": "gradle.properties"}]}}` + const chunk = [ + { + index: 0, + id: "1", + function: { + name: "", + arguments: input, + }, + }, + ] + let xml = handleOpenaiToolCallStreaming(processor, chunk, "openai").chunkContent + const chunk2 = [ + { + index: 0, + id: "1", + function: { + name: "read_file", + arguments: "", + }, + }, + ] + xml = handleOpenaiToolCallStreaming(processor, chunk2, "openai").chunkContent + expect(xml).toContain("") + expect(xml).toContain(`\t +\t\tpom.xml +\t\t1-40 +\t\t80-120 +\t +\t +\t\tbuild.gradle +\t`) + expect(xml).toContain("") + }) + + it("should handle invalid tool names by rejecting them", () => { + const processor = new StreamingToolCallProcessor() + const chunk = [{ index: 0, id: "1", function: { name: "invalid_tool", arguments: '{"msg":"hello"}' } }] + const xml = handleOpenaiToolCallStreaming(processor, chunk, "openai").chunkContent + expect(xml).toBe("") // Should produce no output for invalid tools + expect(handleOpenaiToolCallStreaming(processor, chunk, "openai").toolName).toBe("") // Tool name should be empty + }) +}) diff --git a/src/core/task/tool-call-helper.ts b/src/core/task/tool-call-helper.ts new file mode 100644 index 000000000000..7d57820b8d0c --- /dev/null +++ b/src/core/task/tool-call-helper.ts @@ -0,0 +1,722 @@ +/** + * @fileoverview + * This file contains the implementation of a streaming JSON to XML converter + * for handling tool calls from AI models. It uses a state machine and stacks + * to process incoming JSON chunks incrementally and generate corresponding XML representations. + */ + +import Anthropic from "@anthropic-ai/sdk" +import { ToolCallProviderType } from "../../shared/tools" +import { getToolRegistry } from "../prompts/tools/schemas/tool-registry" +import { type ToolName } from "@roo-code/types" + +/** + * Defines the possible states of the JSON parser. + */ +enum ParserState { + EXPECT_ROOT, // Expecting root object or array + EXPECT_VALUE, + EXPECT_KEY, + EXPECT_COLON, + EXPECT_COMMA_OR_CLOSING, +} + +export interface ToolCallParam { + providerType: ToolCallProviderType + toolName: string + toolUserId: string + chunkContent: string + anthropicContent?: Anthropic.ToolUseBlockParam + originContent: any[] +} + +/** + * Represents the processing state for a single tool call. + * It tracks the parsing progress, state, and structural information. + */ +class ToolCallProcessingState { + functionNameOutputted = false + functionClosed = false + + // The full arguments string accumulated so far. + arguments = "" + // The index of the next character to process in the arguments string. + cursor = 0 + + // The current state of the parser FSM (Finite State Machine). + parserState = ParserState.EXPECT_ROOT + + // Flags for handling string parsing. + inString = false + isEscaped = false + isStreamingStringValue = false + + // Stack to keep track of JSON objects ({) and arrays ([). + bracketStack: ("{" | "[")[] = [] + // Stack to keep track of XML tags for generating closing tags correctly. + xmlTagStack: string[] = [] + // Buffer for the current string literal (key or value) being parsed. + currentString = "" + // Buffer for accumulating primitive values across chunks + primitiveBuffer = "" + // Flag to track if we're at the start of an array to prevent duplicate tags. + justOpenedArray = false +} + +/** + * A streaming processor that converts tool call JSON chunks into XML format in real-time. + */ +export class StreamingToolCallProcessor { + private accumulatedToolCalls: any[] = [] + private processingStates: Map = new Map() + + /** + * Processes a new chunk of tool call data and returns the resulting XML segment. + * @param chunk - The tool call chunk, typically from a streaming API. + * @returns A string containing the newly generated XML. + */ + public processChunk(chunk: any, providerType: ToolCallProviderType = "openai"): string { + switch (providerType) { + case "openai": + return this.processChunkOpenAIFormat(chunk).chunkContent + default: + throw new Error(`Unsupported provider type: ${providerType}`) + } + } + + /** + * Processes a new chunk of tool call data and returns the resulting XML segment. + * @param chunk - The tool call chunk, typically from a streaming API. + * @returns A string containing the newly generated XML. + */ + public processChunkTool(chunk: any, providerType: ToolCallProviderType = "openai"): ToolCallParam { + switch (providerType) { + case "openai": + return this.processChunkOpenAIFormat(chunk) + default: + throw new Error(`Unsupported provider type: ${providerType}`) + } + } + + /** + * Processes a new chunk of tool call data for the OpenAI provider. + * @param chunk - The tool call chunk to process. + * @returns A string containing the resulting XML segment. + */ + private processChunkOpenAIFormat(chunk: any): ToolCallParam { + let xmlOutput = "" + let index = 0 + // Check if the tool name is valid using the tool registry + const toolRegistry = getToolRegistry() + for (const delta of chunk) { + index = delta.index || 0 + + // Initialize state for a new tool call. + if (!this.accumulatedToolCalls[index]) { + this.accumulatedToolCalls[index] = { + id: delta.id || "", + type: "function", + function: { name: "", arguments: "" }, + } + this.processingStates.set(index, new ToolCallProcessingState()) + } + + const toolCall = this.accumulatedToolCalls[index] + const state = this.processingStates.get(index)! + + // Accumulate function name and arguments. + if (delta.function?.name) { + toolCall.function.name += delta.function.name + } + if (delta.function?.arguments) { + toolCall.function.arguments += delta.function.arguments + } + + const isValidToolName = + toolCall.function.name && toolRegistry.isToolSupported(toolCall.function.name as ToolName) + + // Output the opening function tag once the name is known and valid. + if (isValidToolName && !state.functionNameOutputted) { + xmlOutput += `<${toolCall.function.name}>` + state.functionNameOutputted = true + // When we first output the function name, also process any accumulated arguments + if (toolCall.function.arguments.length > 0) { + state.arguments = toolCall.function.arguments + xmlOutput += this.processArguments(state, toolCall.function.name) + } + } else if (state.functionNameOutputted && toolCall.function.arguments.length > state.arguments.length) { + // Process new arguments chunk only if we already have a valid function name + state.arguments = toolCall.function.arguments + xmlOutput += this.processArguments(state, toolCall.function.name) + } + + // Check if the JSON is complete and close the function tag. + if (isValidToolName && !state.functionClosed && state.bracketStack.length === 0 && state.cursor > 0) { + // A simple check to see if we've reached a terminal state. + // A more robust check might be necessary for edge cases. + const remaining = state.arguments.substring(state.cursor).trim() + if (remaining === "") { + xmlOutput += `\n\n` + state.functionClosed = true + } + } + } + // the index of GPT-5 tool_call not start by 0 + const toolCall = this.accumulatedToolCalls[index] + const isValidToolName = + toolCall?.function?.name && toolRegistry.isToolSupported(toolCall.function.name as ToolName) + + const result: ToolCallParam = { + providerType: "openai", + toolName: isValidToolName ? toolCall.function.name : "", + toolUserId: toolCall?.id || undefined, + chunkContent: xmlOutput, + originContent: this.accumulatedToolCalls, + } + + // Provide a temporary anthropicContent (input) during streaming before final closure + const currentState = this.processingStates.get(index) + if (currentState && !currentState.functionClosed && isValidToolName) { + const tmpInput = this.tryBuildTemporaryJson(currentState, toolCall.function.arguments) + if (tmpInput != null) { + result.anthropicContent = { + id: result.toolUserId, + name: result.toolName, + input: tmpInput, + type: "tool_use", + } + } + } + + if (this.processingStates.get(index)?.functionClosed && isValidToolName) { + let input + try { + input = JSON.parse(toolCall.function.arguments) + } catch (e) { + input = "" + } + result.anthropicContent = { + id: result.toolUserId, + name: result.toolName, + input: input, + type: "tool_use", + } + } + return result + } + + /** + * Finalizes the XML output, closing any remaining open tags. + * @returns a string with the closing XML tags. + */ + public finalize(): string { + let finalXml = "" + const toolRegistry = getToolRegistry() + + for (let i = 0; i < this.accumulatedToolCalls.length; i++) { + const state = this.processingStates.get(i) + const toolCall = this.accumulatedToolCalls[i] + + if (!state || !toolCall || state.functionClosed) { + continue + } + + // Check if the tool name is valid + const isValidToolName = + toolCall.function.name && toolRegistry.isToolSupported(toolCall.function.name as ToolName) + + if (!isValidToolName) { + continue + } + + // Process any remaining buffered arguments + if (toolCall.function.arguments.length > state.arguments.length) { + state.arguments = toolCall.function.arguments + finalXml += this.processArguments(state, toolCall.function.name) + } + + // Close remaining tags from the stack in reverse order. + while (state.xmlTagStack.length > 0) { + const tag = state.xmlTagStack.pop()! + const xmlLevel = Math.max(0, state.bracketStack.filter((b) => b === "{").length - 1) + finalXml += `${this.getIndent(xmlLevel)}${this.onCloseTag(tag, toolCall.function.name)}` + } + + if (state.functionNameOutputted) { + finalXml += `\n` + } + } + return finalXml + } + + /** + * Resets the processor to its initial state for a new sequence of tool calls. + */ + public reset(): void { + this.accumulatedToolCalls = [] + this.processingStates.clear() + } + + /** + * Generates indentation for pretty-printing the XML output. + * @param level - The desired indentation level. + * @returns A string of tabs. + */ + private getIndent(level: number): string { + if (level >= 0) { + return "\t".repeat(level) + } + return "" + } + + /** + * The core state machine for parsing JSON arguments and generating XML. + * @param state - The current processing state for a tool call. + * @param toolName - The name of the current tool being processed. + * @returns The generated XML string for the processed chunk. + */ + private processArguments(state: ToolCallProcessingState, toolName: string): string { + let xml = "" + const args = state.arguments + + while (state.cursor < args.length) { + const char = args[state.cursor] + + if (state.inString) { + if (state.isStreamingStringValue) { + // --- Streaming Logic for String Values (character by character) --- + if (char === "\\") { + // Handle escape sequence. + const escapeSequence = this.getFullEscapeSequence(args, state.cursor) + if (escapeSequence) { + try { + // Use JSON.parse on the smallest possible valid JSON string + // to robustly unescape the sequence. + xml += JSON.parse('"' + escapeSequence + '"') + } catch (e) { + // Fallback for incomplete escape sequences at the end of a chunk. + xml += escapeSequence + } + state.cursor += escapeSequence.length + } else { + // Incomplete escape sequence (e.g., `\` at the end of a chunk). + // Stop processing this chunk and wait for the next one. + return xml + } + } else if (char === '"') { + // End of string value. + state.inString = false + state.isStreamingStringValue = false + const parent = state.bracketStack[state.bracketStack.length - 1] + if (parent === "{") { + const tag = state.xmlTagStack.pop()! + if (tag) { + xml += `${this.onCloseTag(tag, toolName)}` + } + } else if (parent === "[") { + // For array elements, close the current tag and prepare for next element + const arrayElementTag = state.xmlTagStack[state.xmlTagStack.length - 1] + if (arrayElementTag) { + xml += `${this.onCloseTag(arrayElementTag, toolName)}` + } + } + state.parserState = ParserState.EXPECT_COMMA_OR_CLOSING + state.cursor++ // Consume the quote + } else { + // Regular character in a string, output directly. + xml += char + state.cursor++ + } + } else { + // --- Buffering Logic for String Keys --- + if (char === "\\" && !state.isEscaped) { + state.currentString += "\\" + state.isEscaped = true + } else if (char === '"' && !state.isEscaped) { + state.inString = false + let finalString + try { + finalString = JSON.parse('"' + state.currentString + '"') + } catch (e) { + finalString = state.currentString + } + + // This must be a key, because values are streamed. + state.xmlTagStack.push(finalString) + // Don't output the opening tag yet - wait to see if this is an array + state.parserState = ParserState.EXPECT_COLON + state.currentString = "" + } else { + state.currentString += char + state.isEscaped = false + } + state.cursor++ + } + continue + } + + if (/\s/.test(char)) { + state.cursor++ + continue + } + + // Handle primitives - accumulate characters until we hit a delimiter + if (state.parserState === ParserState.EXPECT_VALUE) { + // Check if this character could be part of a primitive value + if ( + (char >= "0" && char <= "9") || + char === "-" || + char === "." || + (char >= "a" && char <= "z") || + (char >= "A" && char <= "Z") + ) { + // Accumulate the character + state.primitiveBuffer += char + state.cursor++ + continue + } else if (state.primitiveBuffer.length > 0) { + // We've hit a delimiter, check if we have a complete primitive + const value = state.primitiveBuffer.trim() + if (value === "true" || value === "false" || value === "null" || /^-?\d+(\.\d+)?$/.test(value)) { + // We have a valid primitive + const parent = state.bracketStack[state.bracketStack.length - 1] + if (parent === "[") { + // For array elements + const arrayElementTag = state.xmlTagStack[state.xmlTagStack.length - 1] + if (arrayElementTag) { + const xmlLevel = Math.max(0, state.bracketStack.filter((b) => b === "{").length - 1) + xml += `${this.getIndent(xmlLevel)}${this.onOpenTag(arrayElementTag, toolName)}${value}${this.onCloseTag(arrayElementTag, toolName)}` + } + } else { + // For object properties + const tag = state.xmlTagStack.pop()! + if (tag) { + xml += `${value}${this.onCloseTag(tag, toolName)}` + } + } + state.parserState = ParserState.EXPECT_COMMA_OR_CLOSING + state.primitiveBuffer = "" + // Don't increment cursor - let the delimiter be processed in the switch + continue + } else { + // Invalid primitive, reset buffer and continue + state.primitiveBuffer = "" + } + } + } + + switch (char) { + case "{": + if ( + state.parserState === ParserState.EXPECT_VALUE || + state.parserState === ParserState.EXPECT_ROOT + ) { + const parent = state.bracketStack[state.bracketStack.length - 1] + if (parent === "[") { + // For an object inside an array, we need to add the repeating tag. + const arrayElementTag = state.xmlTagStack[state.xmlTagStack.length - 1] + if (arrayElementTag) { + // Array elements should be at the same level as their array key + // XML level = containing object level + const xmlLevel = Math.max(0, state.bracketStack.filter((b) => b === "{").length - 1) + xml += `${this.getIndent(xmlLevel)}${this.onOpenTag(arrayElementTag, toolName)}` + } + } + state.bracketStack.push("{") + state.parserState = ParserState.EXPECT_KEY + xml += "\n" + // Any value inside an array consumes the "justOpenedArray" state. + state.justOpenedArray = false + } + break + case "}": + if ( + state.parserState === ParserState.EXPECT_KEY || + state.parserState === ParserState.EXPECT_COMMA_OR_CLOSING + ) { + const parentBeforePop = state.bracketStack[state.bracketStack.length - 1] + state.bracketStack.pop() // Pop '{' + const parentAfterPop = state.bracketStack[state.bracketStack.length - 1] + + if (parentBeforePop === "{" && parentAfterPop === "[") { + // Closing an object that is inside an array. + const arrayElementTag = state.xmlTagStack[state.xmlTagStack.length - 1] + if (arrayElementTag) { + const xmlLevel = Math.max(0, state.bracketStack.filter((b) => b === "{").length - 1) + xml += `${this.getIndent(xmlLevel)}${this.onCloseTag(arrayElementTag, toolName)}` + } + // Don't pop from xmlTagStack - we need to reuse the array element tag + } else { + // Normal object closure. + const tag = state.xmlTagStack.pop()! + if (tag) { + const xmlLevel = Math.max(0, state.bracketStack.filter((b) => b === "{").length - 1) + xml += `${this.getIndent(xmlLevel)}${this.onCloseTag(tag, toolName)}` + } + } + state.parserState = ParserState.EXPECT_COMMA_OR_CLOSING + } + break + case "[": + if ( + state.parserState === ParserState.EXPECT_VALUE || + state.parserState === ParserState.EXPECT_ROOT + ) { + state.bracketStack.push("[") + state.parserState = ParserState.EXPECT_VALUE // An array contains values + state.justOpenedArray = true + // Don't add anything to xmlTagStack here - wait for the actual array elements + } + break + case "]": + if ( + state.parserState === ParserState.EXPECT_VALUE || // handles empty array e.g. [] + state.parserState === ParserState.EXPECT_COMMA_OR_CLOSING + ) { + // If this is an empty array (we just opened it and immediately closing), output empty tag pair + if ( + state.parserState === ParserState.EXPECT_VALUE && + state.justOpenedArray && + state.xmlTagStack.length > 0 + ) { + const tag = state.xmlTagStack[state.xmlTagStack.length - 1] + if (tag) { + const xmlLevel = Math.max(0, state.bracketStack.filter((b) => b === "{").length - 1) + xml += `${this.getIndent(xmlLevel)}${this.onOpenTag(tag, toolName)}${this.onCloseTag(tag, toolName)}` + } + } + + state.bracketStack.pop() // Pop '[' + // For arrays, we keep the tag on the stack for reuse, but only pop it when we close the array + if (state.xmlTagStack.length > 0) { + state.xmlTagStack.pop() // Pop the array's tag name, its job is done. + } + state.parserState = ParserState.EXPECT_COMMA_OR_CLOSING + state.justOpenedArray = false + } + break + case '"': + if (state.parserState === ParserState.EXPECT_VALUE) { + // We've encountered the start of a string that is a JSON value. + state.isStreamingStringValue = true + state.inString = true + // If we're in an array, we need to open a tag for this array element + const parent = state.bracketStack[state.bracketStack.length - 1] + if (parent === "[") { + const arrayElementTag = state.xmlTagStack[state.xmlTagStack.length - 1] + if (arrayElementTag) { + const xmlLevel = Math.max(0, state.bracketStack.filter((b) => b === "{").length - 1) + xml += `${this.getIndent(xmlLevel)}${this.onOpenTag(arrayElementTag, toolName)}` + } + } + } else if (state.parserState === ParserState.EXPECT_KEY) { + // This is the start of a string that is a JSON key. + state.isStreamingStringValue = false + state.inString = true + } + break + case ":": + if (state.parserState === ParserState.EXPECT_COLON) { + // Look ahead to see if this is an array or a regular value + let nextNonWhitespace = "" + for (let i = state.cursor + 1; i < args.length; i++) { + if (!/\s/.test(args[i])) { + nextNonWhitespace = args[i] + break + } + } + + // If the next non-whitespace character is not '[', output the opening tag now + if (nextNonWhitespace !== "[") { + const tag = state.xmlTagStack[state.xmlTagStack.length - 1] + if (tag) { + // For regular object properties, calculate XML indentation level + // XML level = JSON object nesting - 1 (since root tool object doesn't count) + const xmlLevel = Math.max(0, state.bracketStack.filter((b) => b === "{").length - 1) + xml += `${this.getIndent(xmlLevel)}${this.onOpenTag(tag, toolName)}` + } + } + + state.parserState = ParserState.EXPECT_VALUE + } + break + case ",": + if (state.parserState === ParserState.EXPECT_COMMA_OR_CLOSING) { + const parent = state.bracketStack[state.bracketStack.length - 1] + state.parserState = parent === "{" ? ParserState.EXPECT_KEY : ParserState.EXPECT_VALUE + } + break + } + state.cursor++ + } + return xml + } + + /** + * Extracts a complete JSON escape sequence from a string, starting at a given position. + * @param str - The string containing the escape sequence. + * @param pos - The starting position of the backslash. + * @returns The full escape sequence (e.g., "\\n", "\\uABCD") or null if incomplete. + */ + private getFullEscapeSequence(str: string, pos: number): string | null { + if (pos < 0 || str[pos] !== "\\") { + return null + } + // If the backslash is the last character, we need more data. + if (pos + 1 >= str.length) { + return null + } + const nextChar = str[pos + 1] + if (nextChar === "u") { + // A unicode escape sequence requires 4 hex digits. + if (pos + 5 >= str.length) { + return null // Incomplete unicode sequence. + } + const hex = str.substring(pos + 2, pos + 6) + // Basic validation for hex characters. + if (/^[0-9a-fA-F]{4}$/.test(hex)) { + return "\\u" + hex + } + return null + } + // For simple escapes like \n, \", \\, etc. + return str.substring(pos, pos + 2) + } + + /** + * Attempts to construct a temporarily valid JSON string from the current streaming buffer and parser state, + * allowing JSON.parse to succeed and provide a usable anthropicContent.input during partial tool call streaming. + * This function does NOT mutate the original parser state; it operates only on copies. + * + * Implementation details: + * - If currently parsing a string, closes the string with a quote and removes incomplete escape/unicode sequences. + * - If a primitive value (true/false/null/number) is incomplete, auto-completes it to a valid JSON token. + * - Closes all unclosed object/array brackets, inserting "null" where a value is expected. + * - Removes trailing commas before closing brackets to avoid JSON syntax errors. + * - On initial parse failure, tries to append "null" or repeatedly trim trailing commas and retries parsing. + * - Only used for constructing intermediate JSON during streaming; final result should use fully parsed content. + */ + private tryBuildTemporaryJson(state: ToolCallProcessingState, rawArgs: string): any | null { + let s = rawArgs + + if (!s || s.trim().length === 0) { + return null + } + + const trimTrailingComma = (str: string): string => str.replace(/,(\s*)$/, "$1") + + const completePrimitiveSuffix = (pb: string): string => { + // Complete booleans/null prefixes + if (/^(t|tr|tru)$/.test(pb)) return "e" // true + if (/^(f|fa|fal|fals)$/.test(pb)) return "e" // false + if (/^(n|nu|nul)$/.test(pb)) return "l" // null + // Complete numeric partials like "-" or "12." + if (/^-?$/.test(pb)) return "0" + if (/^-?\d+\.$/.test(pb)) return "0" + return "" + } + + const stripIncompleteUnicodeAtEnd = (input: string): string => { + const uniIndex = input.lastIndexOf("\\u") + if (uniIndex !== -1) { + const tail = input.slice(uniIndex + 2) + if (!/^[0-9a-fA-F]{4}$/.test(tail)) { + return input.slice(uniIndex) ? input.slice(0, uniIndex) : input + } + } + return input + } + + // 1) Handle in-flight strings + if (state.inString) { + // Drop dangling backslash to avoid invalid escape at buffer end + if (s.endsWith("\\")) s = s.slice(0, -1) + // Trim incomplete unicode escape (e.g. \u12) + s = stripIncompleteUnicodeAtEnd(s) + // Close the string + s += `"` + } else { + // 2) Not inside a string; if a primitive token is partially accumulated, try to complete it minimally + if (state.primitiveBuffer && state.primitiveBuffer.length > 0) { + const suffix = completePrimitiveSuffix(state.primitiveBuffer) + if (suffix) s += suffix + } + } + + // 3) Before closing brackets, remove trailing commas to avoid JSON syntax errors + s = trimTrailingComma(s) + + // 4) Close any open objects/arrays per the current stack + if (state.bracketStack.length > 0) { + for (let i = state.bracketStack.length - 1; i >= 0; i--) { + // Always ensure no trailing comma before we append a closer + s = trimTrailingComma(s) + + const b = state.bracketStack[i] + + s += b === "{" ? "}" : "]" + } + } + + // 5) First parse attempt + try { + return JSON.parse(s) + } catch { + // 6) Second attempt: add one more null if still dangling and retry + try { + let s2 = s + const lastNonWs = this.findLastNonWhitespaceChar(s2) + if (lastNonWs === ":" || state.parserState === ParserState.EXPECT_VALUE) { + s2 += "null" + } + s2 = trimTrailingComma(s2) + return JSON.parse(s2) + } catch { + // 7) Final fallback: repeatedly trim trailing commas and retry + let s3 = s + for (let k = 0; k < 3; k++) { + const trimmed = trimTrailingComma(s3) + if (trimmed === s3) break + s3 = trimmed + try { + return JSON.parse(s3) + } catch { + // continue + } + } + return null + } + } + } + + private findLastNonWhitespaceChar(str: string): string { + for (let i = str.length - 1; i >= 0; i--) { + const ch = str[i] + if (!/\s/.test(ch)) return ch + } + return "" + } + + private onOpenTag(tag: string, toolName: string): string { + return `<${tag}>` + } + + private onCloseTag(tag: string, toolName: string): string { + return `\n` + } +} + +/** + * A handler function that uses the StreamingToolCallProcessor to process streaming tool calls. + * @param processor - An instance of StreamingToolCallProcessor. + * @param chunk - The tool call chunk to process. + * @param providerType - The type of tool call provider (e.g., OpenAI). + * @returns The generated XML string. + */ +export const handleOpenaiToolCallStreaming = ( + processor: StreamingToolCallProcessor, + chunk: any, + providerType: ToolCallProviderType, +): ToolCallParam => { + return processor.processChunkTool(chunk, providerType) +} diff --git a/src/core/tools/__tests__/applyDiffTool.tool-call.spec.ts b/src/core/tools/__tests__/applyDiffTool.tool-call.spec.ts new file mode 100644 index 000000000000..538102ae7294 --- /dev/null +++ b/src/core/tools/__tests__/applyDiffTool.tool-call.spec.ts @@ -0,0 +1,110 @@ +// Import after mocking to get the mocked version +import { applyDiffToolLegacy } from "../applyDiffTool" + +describe("applyDiffTool tool call parsing", () => { + let mockCline: any + let mockBlock: any + let mockAskApproval: any + let mockHandleError: any + let mockPushToolResult: any + let mockRemoveClosingTag: any + + beforeEach(() => { + vi.clearAllMocks() + + mockCline = { + cwd: "/test", + diffStrategy: { + applyDiff: vi.fn().mockResolvedValue({ success: true, content: "file content" }), + getProgressStatus: vi.fn(), + }, + diffViewProvider: { + reset: vi.fn(), + open: vi.fn(), + update: vi.fn(), + scrollToFirstDiff: vi.fn(), + saveChanges: vi.fn(), + pushToolWriteResult: vi.fn(), + }, + api: { + getModel: vi.fn().mockReturnValue({ id: "test-model" }), + }, + apiConfiguration: { + toolCallEnabled: true, + }, + consecutiveMistakeCount: 0, + recordToolError: vi.fn(), + sayAndCreateMissingParamError: vi.fn(), + fileContextTracker: { + trackFileContext: vi.fn(), + }, + didEditFile: false, + providerRef: { + deref: () => ({ + getState: async () => ({}), + }), + }, + } + mockBlock = { + params: { path: "test.ts" }, + toolUseParam: { + input: { + diff: [ + { + d1: 10, + d2: "search content", + d3: "replace content", + }, + ], + }, + }, + toolUseId: "test-1", + partial: false, + } + mockAskApproval = vi.fn().mockResolvedValue(true) + mockHandleError = vi.fn() + mockPushToolResult = vi.fn() + mockRemoveClosingTag = vi.fn((tag, value) => value) + + // Mock file system checks + vi.mock("fs/promises", () => ({ + readFile: vi.fn().mockResolvedValue("file content"), + default: { + readFile: vi.fn().mockResolvedValue("file content"), + }, + })) + vi.mock("../../../utils/fs", () => ({ + fileExistsAtPath: vi.fn().mockResolvedValue(true), + })) + }) + + it("should format diffContent from toolUseParam when toolCallEnabled is true", async () => { + await applyDiffToolLegacy( + mockCline, + mockBlock, + mockAskApproval, + mockHandleError, + mockPushToolResult, + mockRemoveClosingTag, + ) + + const expectedDiffContent = `<<<<<<< SEARCH\n:start_line:10\n-------\nsearch content\n=======\nreplace content\n>>>>>>> REPLACE\n\n` + expect(mockBlock.params.diff).toEqual(expectedDiffContent) + }) + + it("should not modify diffContent if toolUseParam.input.diff is missing or empty", async () => { + mockBlock.toolUseParam.input = {} + mockBlock.params.diff = "original diff" + + await applyDiffToolLegacy( + mockCline, + mockBlock, + mockAskApproval, + mockHandleError, + mockPushToolResult, + mockRemoveClosingTag, + ) + + expect(mockBlock.params.diff).toEqual("original diff") + }) +}) diff --git a/src/core/tools/__tests__/attemptCompletionTool.spec.ts b/src/core/tools/__tests__/attemptCompletionTool.spec.ts index fcad4d5f4925..94610385f09b 100644 --- a/src/core/tools/__tests__/attemptCompletionTool.spec.ts +++ b/src/core/tools/__tests__/attemptCompletionTool.spec.ts @@ -6,6 +6,7 @@ import { AttemptCompletionToolUse } from "../../../shared/tools" vi.mock("../../prompts/responses", () => ({ formatResponse: { toolError: vi.fn((msg: string) => `Error: ${msg}`), + imageBlocks: vi.fn(() => []), }, })) @@ -25,9 +26,18 @@ vi.mock("../../../shared/package", () => ({ }, })) +vi.mock("@roo-code/telemetry", () => ({ + TelemetryService: { + instance: { + captureTaskCompleted: vi.fn(), + }, + }, +})) + import { attemptCompletionTool } from "../attemptCompletionTool" import { Task } from "../../task/Task" import * as vscode from "vscode" +import { TelemetryService } from "@roo-code/telemetry" describe("attemptCompletionTool", () => { let mockTask: Partial @@ -409,4 +419,56 @@ describe("attemptCompletionTool", () => { ) }) }) + + describe("when toolCallEnabled is true", () => { + it('should push a "tool_result" message when toolCallEnabled is true and toolUseId is present', async () => { + mockTask = { + consecutiveMistakeCount: 0, + recordToolError: vi.fn(), + todoList: undefined, + apiConfiguration: { + toolCallEnabled: true, + }, + userMessageContent: [], + clineMessages: [], + say: vi.fn(), + getTokenUsage: vi.fn(), + emit: vi.fn(), + } + mockTask.ask = vi.fn().mockResolvedValue({ response: "", text: "success", images: [] }) + const block: AttemptCompletionToolUse = { + type: "tool_use", + name: "attempt_completion", + params: { result: "Task completed successfully" }, + partial: false, + toolUseId: "tool-use-id-123", + } + mockToolDescription.mockReturnValue("ToolDescription") + + await attemptCompletionTool( + mockTask as Task, + block, + mockAskApproval, + mockHandleError, + mockPushToolResult, + mockRemoveClosingTag, + mockToolDescription, + mockAskFinishSubTaskApproval, + ) + + expect(mockTask.userMessageContent).toEqual([ + { + type: "tool_result", + tool_use_id: "tool-use-id-123", + content: [ + { type: "text", text: "ToolDescription Result:" }, + { + type: "text", + text: "The user has provided feedback on the results. Consider their input to continue the task, and then attempt completion again.\n\nsuccess\n", + }, + ], + }, + ]) + }) + }) }) diff --git a/src/core/tools/__tests__/multiApplyDiffTool.spec.ts b/src/core/tools/__tests__/multiApplyDiffTool.spec.ts index 0bdedb9cd563..223bfc5423b4 100644 --- a/src/core/tools/__tests__/multiApplyDiffTool.spec.ts +++ b/src/core/tools/__tests__/multiApplyDiffTool.spec.ts @@ -398,4 +398,124 @@ new content expect(mockHandleError).not.toHaveBeenCalled() }) }) + describe("when tool call is enabled", () => { + beforeEach(() => { + mockProvider.getState.mockResolvedValue({ + experiments: { + [EXPERIMENT_IDS.MULTI_FILE_APPLY_DIFF]: true, + }, + apiConfiguration: { + toolCallEnabled: true, + }, + diagnosticsEnabled: true, + writeDelayMs: 0, + }) + }) + it("should handle diff with content", async () => { + mockBlock = { + params: {}, + toolUseParam: { + input: { + args: { + file: [ + { + path: "test.ts", + diff: [ + { + content: + "<<<<<<< SEARCH\nold content\n=======\nnew content\n>>>>>>> REPLACE", + }, + ], + }, + ], + }, + }, + }, + partial: false, + } + + await applyDiffTool( + mockCline, + mockBlock, + mockAskApproval, + mockHandleError, + mockPushToolResult, + mockRemoveClosingTag, + ) + + expect(mockPushToolResult).toHaveBeenCalled() + expect(mockHandleError).not.toHaveBeenCalled() + }) + + it("should handle diff with search and replace", async () => { + mockBlock = { + params: {}, + toolUseParam: { + input: { + args: { + file: [ + { + path: "test.ts", + diff: [ + { + search: "old content", + replace: "new content", + }, + ], + }, + ], + }, + }, + }, + partial: false, + } + + await applyDiffTool( + mockCline, + mockBlock, + mockAskApproval, + mockHandleError, + mockPushToolResult, + mockRemoveClosingTag, + ) + + expect(mockPushToolResult).toHaveBeenCalled() + expect(mockHandleError).not.toHaveBeenCalled() + }) + + it("should handle diff with search but no replace", async () => { + mockBlock = { + params: {}, + toolUseParam: { + input: { + args: { + file: [ + { + path: "test.ts", + diff: [ + { + search: "old content", + }, + ], + }, + ], + }, + }, + }, + + partial: false, + } + + await applyDiffTool( + mockCline, + mockBlock, + mockAskApproval, + mockHandleError, + mockPushToolResult, + mockRemoveClosingTag, + ) + + expect(mockPushToolResult).toHaveBeenCalled() + }) + }) }) diff --git a/src/core/tools/applyDiffTool.ts b/src/core/tools/applyDiffTool.ts index 903e3c846ecf..456782fe6d2f 100644 --- a/src/core/tools/applyDiffTool.ts +++ b/src/core/tools/applyDiffTool.ts @@ -25,6 +25,37 @@ export async function applyDiffToolLegacy( const relPath: string | undefined = block.params.path let diffContent: string | undefined = block.params.diff + if (block.toolUseId && block.toolUseParam?.input) { + const input = block.toolUseParam?.input as any + const diffs = Array.isArray(input?.diff) ? input?.diff : undefined + if (diffs && diffs.length > 0) { + let tmpDiff = "" + for (let i = 0; i < diffs.length; i++) { + const diff = diffs[i] + const startLine = diff?.d1 + const search = diff?.d2 + const replace = diff?.d3 + if (i > 0) { + tmpDiff += "\n>>>>>>> REPLACE\n\n" + } + if (startLine) { + tmpDiff += `<<<<<<< SEARCH\n:start_line:${startLine}` + } + if (startLine && search) { + tmpDiff += `\n-------\n${search}` + } + if (startLine && search && replace) { + tmpDiff += `\n=======\n${replace}` + } + } + if (!block.partial) { + tmpDiff += "\n>>>>>>> REPLACE\n\n" + } + diffContent = tmpDiff + block.params.diff = diffContent + } + } + if (diffContent && !cline.api.getModel().id.includes("claude")) { diffContent = unescapeHtmlEntities(diffContent) } diff --git a/src/core/tools/attemptCompletionTool.ts b/src/core/tools/attemptCompletionTool.ts index 5074d7f4e808..8e665e9bc0fe 100644 --- a/src/core/tools/attemptCompletionTool.ts +++ b/src/core/tools/attemptCompletionTool.ts @@ -129,8 +129,20 @@ export async function attemptCompletionTool( }) toolResults.push(...formatResponse.imageBlocks(images)) - cline.userMessageContent.push({ type: "text", text: `${toolDescription()} Result:` }) - cline.userMessageContent.push(...toolResults) + + const newMessage: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[] = [ + { type: "text", text: `${toolDescription()} Result:` }, + ...toolResults, + ] + if (block.toolUseId) { + cline.userMessageContent.push({ + type: "tool_result", + tool_use_id: block.toolUseId, + content: newMessage, + }) + } else { + cline.userMessageContent.push(...newMessage) + } return } diff --git a/src/core/tools/multiApplyDiffTool.ts b/src/core/tools/multiApplyDiffTool.ts index 50695b1da73e..4aae13473d3d 100644 --- a/src/core/tools/multiApplyDiffTool.ts +++ b/src/core/tools/multiApplyDiffTool.ts @@ -12,7 +12,7 @@ import { formatResponse } from "../prompts/responses" import { fileExistsAtPath } from "../../utils/fs" import { RecordSource } from "../context-tracking/FileContextTrackerTypes" import { unescapeHtmlEntities } from "../../utils/text-normalization" -import { parseXmlForDiff } from "../../utils/xml" +import { parseXml, parseXmlForDiff } from "../../utils/xml" import { EXPERIMENT_IDS, experiments } from "../../shared/experiments" import { applyDiffToolLegacy } from "./applyDiffTool" @@ -108,11 +108,18 @@ export async function applyDiffTool( if (argsXmlTag) { // Parse file entries from XML (new way) try { - // IMPORTANT: We use parseXmlForDiff here instead of parseXml to prevent HTML entity decoding - // This ensures exact character matching when comparing parsed content against original file content - // Without this, special characters like & would be decoded to & causing diff mismatches - const parsed = parseXmlForDiff(argsXmlTag, ["file.diff.content"]) as ParsedXmlResult - const files = Array.isArray(parsed.file) ? parsed.file : [parsed.file].filter(Boolean) + let files = [] as any[] + if (!block.toolUseId) { + // IMPORTANT: We use parseXmlForDiff here instead of parseXml to prevent HTML entity decoding + // This ensures exact character matching when comparing parsed content against original file content + // Without this, special characters like & would be decoded to & causing diff mismatches + const parsed = parseXmlForDiff(argsXmlTag, ["file.diff.content"]) as ParsedXmlResult + files = Array.isArray(parsed.file) ? parsed.file : [parsed.file].filter(Boolean) + } else { + const input = block.toolUseParam?.input as any + const args = input.args + files = Array.isArray(args?.file) ? args?.file : [args?.file].filter(Boolean) + } for (const file of files) { if (!file.path || !file.diff) continue @@ -135,8 +142,25 @@ export async function applyDiffTool( let diffContent: string let startLine: number | undefined - // Ensure content is a string before storing it - diffContent = typeof diff.content === "string" ? diff.content : "" + if (block.toolUseId) { + if (!diff.content) { + const search = diff?.search + const replace = diff?.replace + diffContent = "" + if (search) { + diffContent += `>>>>>> REPLACE\n]]>` + } + } else { + const content = diff.content + diffContent = `` + } + } else { + // Ensure content is a string before storing it + diffContent = typeof diff.content === "string" ? diff.content : "" + } startLine = diff.start_line ? parseInt(diff.start_line) : undefined // Only add to operations if we have valid content diff --git a/src/core/tools/writeToFileTool.ts b/src/core/tools/writeToFileTool.ts index e82eab92bc89..087330c24e52 100644 --- a/src/core/tools/writeToFileTool.ts +++ b/src/core/tools/writeToFileTool.ts @@ -152,7 +152,12 @@ export async function writeToFileTool( pushToolResult( formatResponse.toolError( - formatResponse.lineCountTruncationError(actualLineCount, isNewFile, diffStrategyEnabled), + formatResponse.lineCountTruncationError( + actualLineCount, + isNewFile, + diffStrategyEnabled, + cline?.apiConfiguration?.toolCallEnabled === true, + ), ), ) await cline.diffViewProvider.revertChanges() diff --git a/src/core/webview/generateSystemPrompt.ts b/src/core/webview/generateSystemPrompt.ts index a639d8a9606f..e95d56a0cd1c 100644 --- a/src/core/webview/generateSystemPrompt.ts +++ b/src/core/webview/generateSystemPrompt.ts @@ -9,6 +9,7 @@ import { MultiSearchReplaceDiffStrategy } from "../diff/strategies/multi-search- import { MultiFileSearchReplaceDiffStrategy } from "../diff/strategies/multi-file-search-replace" import { ClineProvider } from "./ClineProvider" +import { supportToolCall } from "../../shared/tools" export const generateSystemPrompt = async (provider: ClineProvider, message: WebviewMessage) => { const { @@ -84,6 +85,8 @@ export const generateSystemPrompt = async (provider: ClineProvider, message: Web { maxConcurrentFileReads: maxConcurrentFileReads ?? 5, todoListEnabled: apiConfiguration?.todoListEnabled ?? true, + toolCallEnabled: + (apiConfiguration?.toolCallEnabled ?? false) && supportToolCall(apiConfiguration.apiProvider), useAgentRules: vscode.workspace.getConfiguration("roo-cline").get("useAgentRules") ?? true, newTaskRequireTodos: vscode.workspace .getConfiguration("roo-cline") diff --git a/src/shared/tools.ts b/src/shared/tools.ts index 608b50752e7d..b182950b8d42 100644 --- a/src/shared/tools.ts +++ b/src/shared/tools.ts @@ -77,8 +77,19 @@ export interface ToolUse { // params is a partial record, allowing only some or none of the possible parameters to be used params: Partial> partial: boolean + toolUseId?: string // only toolCallEnabled=true + toolUseParam?: Anthropic.ToolUseBlockParam } +export const supportToolCall = (provider: string | null | undefined) => { + if (provider === null || provider === undefined) { + return false + } + return ["openrouter", "openai", "deepseek", "lmstudio", "roo"].includes(provider) +} + +export type ToolCallProviderType = "openai" | "anthropic" + export interface ExecuteCommandToolUse extends ToolUse { name: "execute_command" // Pick, "command"> makes "command" required, but Partial<> makes it optional diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx index 7f2ac4ed7a34..3f4b3a3e4c51 100644 --- a/webview-ui/src/components/settings/ApiOptions.tsx +++ b/webview-ui/src/components/settings/ApiOptions.tsx @@ -105,11 +105,13 @@ import { ThinkingBudget } from "./ThinkingBudget" import { Verbosity } from "./Verbosity" import { DiffSettingsControl } from "./DiffSettingsControl" import { TodoListSettingsControl } from "./TodoListSettingsControl" +import { ToolCallSettingsControl } from "./ToolCallSettingsControl" import { TemperatureControl } from "./TemperatureControl" import { RateLimitSecondsControl } from "./RateLimitSecondsControl" import { ConsecutiveMistakeLimitControl } from "./ConsecutiveMistakeLimitControl" import { BedrockCustomArn } from "./providers/BedrockCustomArn" import { buildDocLink } from "@src/utils/docLinks" +import { supportToolCall } from "@roo/tools" export interface ApiOptionsProps { uriScheme: string | undefined @@ -421,6 +423,9 @@ const ApiOptions = ({ })) }, [organizationAllowList, apiConfiguration.apiProvider]) + const enableToolCall = useMemo(() => { + return supportToolCall(selectedProvider) + }, [selectedProvider]) return (
@@ -760,6 +765,12 @@ const ApiOptions = ({ todoListEnabled={apiConfiguration.todoListEnabled} onChange={(field, value) => setApiConfigurationField(field, value)} /> + {enableToolCall && ( + setApiConfigurationField(field, value)} + /> + )} (({ onDone, t const handleSubmit = () => { if (isSettingValid) { + // Check if provider supports tool calls, if not, set toolCallEnabled to false + if (!supportToolCall(apiConfiguration?.apiProvider)) { + apiConfiguration!.toolCallEnabled = false + } + vscode.postMessage({ type: "language", text: language }) vscode.postMessage({ type: "alwaysAllowReadOnly", bool: alwaysAllowReadOnly }) vscode.postMessage({ diff --git a/webview-ui/src/components/settings/ToolCallSettingsControl.tsx b/webview-ui/src/components/settings/ToolCallSettingsControl.tsx new file mode 100644 index 000000000000..d91fec039258 --- /dev/null +++ b/webview-ui/src/components/settings/ToolCallSettingsControl.tsx @@ -0,0 +1,35 @@ +import React, { useCallback } from "react" +import { useAppTranslation } from "@/i18n/TranslationContext" +import { VSCodeCheckbox } from "@vscode/webview-ui-toolkit/react" + +interface ToolCallSettingsControlProps { + toolCallEnabled?: boolean + onChange: (field: "toolCallEnabled", value: any) => void +} + +export const ToolCallSettingsControl: React.FC = ({ + toolCallEnabled = false, + onChange, +}) => { + const { t } = useAppTranslation() + + const handleToolCallEnabledChange = useCallback( + (e: any) => { + onChange("toolCallEnabled", e.target.checked) + }, + [onChange], + ) + + return ( +
+
+ + {t("settings:advanced.toolCall.label")} + +
+ {t("settings:advanced.toolCall.description")} +
+
+
+ ) +} diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json index d785ed5fe0b6..cb85105634de 100644 --- a/webview-ui/src/i18n/locales/ca/settings.json +++ b/webview-ui/src/i18n/locales/ca/settings.json @@ -685,6 +685,10 @@ "todoList": { "label": "Habilitar eina de llista de tasques", "description": "Quan està habilitat, Roo pot crear i gestionar llistes de tasques per fer el seguiment del progrés de les tasques. Això ajuda a organitzar tasques complexes en passos manejables." + }, + "toolCall": { + "label": "Habilitar crides d'eines", + "description": "Quan està habilitat, Roo pot utilitzar crides d'eines en lloc de prompts de crides d'eines basats en XML. Funcionalitat experimental!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json index 6648b6e6702e..4b2b2fd7a800 100644 --- a/webview-ui/src/i18n/locales/de/settings.json +++ b/webview-ui/src/i18n/locales/de/settings.json @@ -685,6 +685,10 @@ "todoList": { "label": "Todo-Listen-Tool aktivieren", "description": "Wenn aktiviert, kann Roo Todo-Listen erstellen und verwalten, um den Aufgabenfortschritt zu verfolgen. Dies hilft, komplexe Aufgaben in überschaubare Schritte zu organisieren." + }, + "toolCall": { + "label": "Tool-Aufrufe aktivieren", + "description": "Wenn aktiviert, kann Roo Tool-Aufrufe anstelle von XML-basierten Tool-Aufruf-Prompts verwenden. Experimentelle Funktion!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index 1cb4b144f702..18f7e7228310 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -684,6 +684,10 @@ "todoList": { "label": "Enable todo list tool", "description": "When enabled, Roo can create and manage todo lists to track task progress. This helps organize complex tasks into manageable steps." + }, + "toolCall": { + "label": "Enable tool calling", + "description": "When enabled, Roo can use tool call instead of XML-based tool calling prompt. Experimental!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json index c1174cbf0f89..476b9a4b3e0b 100644 --- a/webview-ui/src/i18n/locales/es/settings.json +++ b/webview-ui/src/i18n/locales/es/settings.json @@ -685,6 +685,10 @@ "todoList": { "label": "Habilitar herramienta de lista de tareas", "description": "Cuando está habilitado, Roo puede crear y gestionar listas de tareas para hacer seguimiento del progreso. Esto ayuda a organizar tareas complejas en pasos manejables." + }, + "toolCall": { + "label": "Habilitar llamadas de herramientas", + "description": "Cuando está habilitado, Roo puede usar llamadas de herramientas en lugar de prompts de llamada de herramientas basados en XML. ¡¡¡Función experimental!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json index 5cfd4d005ff2..340a6e89e810 100644 --- a/webview-ui/src/i18n/locales/fr/settings.json +++ b/webview-ui/src/i18n/locales/fr/settings.json @@ -685,6 +685,10 @@ "todoList": { "label": "Activer l'outil de liste de tâches", "description": "Lorsqu'activé, Roo peut créer et gérer des listes de tâches pour suivre la progression. Cela aide à organiser les tâches complexes en étapes gérables." + }, + "toolCall": { + "label": "Activer l'appel d'outils", + "description": "Lorsqu'activé, Roo peut utiliser l'appel d'outils au lieu des invites d'appel d'outils basées sur XML. Fonctionnalité expérimentale !!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json index bb5cf6f6c40d..b963ae2d37d6 100644 --- a/webview-ui/src/i18n/locales/hi/settings.json +++ b/webview-ui/src/i18n/locales/hi/settings.json @@ -686,6 +686,10 @@ "todoList": { "label": "टूडू सूची टूल सक्षम करें", "description": "जब सक्षम हो, तो Roo कार्य प्रगति को ट्रैक करने के लिए टूडू सूचियाँ बना और प्रबंधित कर सकता है। यह जटिल कार्यों को प्रबंधनीय चरणों में व्यवस्थित करने में मदद करता है।" + }, + "toolCall": { + "label": "टूल कॉल सक्षम करें", + "description": "जब सक्षम हो, तो Roo XML-आधारित टूल कॉल प्रॉम्प्ट के बजाय टूल कॉल का उपयोग कर सकता है। प्रायोगिक सुविधा!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/id/settings.json b/webview-ui/src/i18n/locales/id/settings.json index 93225bab1e31..4bdef2a24dd2 100644 --- a/webview-ui/src/i18n/locales/id/settings.json +++ b/webview-ui/src/i18n/locales/id/settings.json @@ -690,6 +690,10 @@ "todoList": { "label": "Aktifkan alat daftar tugas", "description": "Saat diaktifkan, Roo dapat membuat dan mengelola daftar tugas untuk melacak kemajuan tugas. Ini membantu mengatur tugas kompleks menjadi langkah-langkah yang dapat dikelola." + }, + "toolCall": { + "label": "Aktifkan panggilan alat", + "description": "Saat diaktifkan, Roo dapat menggunakan panggilan alat daripada prompt panggilan alat berbasis XML. Fitur eksperimental!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json index b8487b01dd9c..06fab5626d5c 100644 --- a/webview-ui/src/i18n/locales/it/settings.json +++ b/webview-ui/src/i18n/locales/it/settings.json @@ -686,6 +686,10 @@ "todoList": { "label": "Abilita strumento lista di cose da fare", "description": "Quando abilitato, Roo può creare e gestire liste di cose da fare per tracciare il progresso delle attività. Questo aiuta a organizzare attività complesse in passaggi gestibili." + }, + "toolCall": { + "label": "Abilita chiamate di strumenti", + "description": "Quando abilitato, Roo può usare chiamate di strumenti invece di prompt di chiamata di strumenti basati su XML. Funzionalità sperimentale!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json index d8b9d6482f16..847cc32b4ebb 100644 --- a/webview-ui/src/i18n/locales/ja/settings.json +++ b/webview-ui/src/i18n/locales/ja/settings.json @@ -686,6 +686,10 @@ "todoList": { "label": "ToDoリストツールを有効にする", "description": "有効にすると、Rooはタスクの進捗を追跡するためのToDoリストを作成・管理できます。これにより、複雑なタスクを管理しやすいステップに整理できます。" + }, + "toolCall": { + "label": "ツール呼び出しを有効にする", + "description": "有効にすると、RooはXMLベースのツール呼び出しプロンプトの代わりにツール呼び出しを使用できます。実験的機能!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json index 6b8cd0d2c98d..69cafae528a0 100644 --- a/webview-ui/src/i18n/locales/ko/settings.json +++ b/webview-ui/src/i18n/locales/ko/settings.json @@ -686,6 +686,10 @@ "todoList": { "label": "할 일 목록 도구 활성화", "description": "활성화하면 Roo가 작업 진행 상황을 추적하기 위한 할 일 목록을 만들고 관리할 수 있습니다. 이는 복잡한 작업을 관리 가능한 단계로 구성하는 데 도움이 됩니다." + }, + "toolCall": { + "label": "도구 호출 활성화", + "description": "활성화하면 Roo가 XML 기반 도구 호출 프롬프트 대신 도구 호출을 사용할 수 있습니다. 실험적 기능!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json index 7e9da9b11af1..d57ab0c0a272 100644 --- a/webview-ui/src/i18n/locales/nl/settings.json +++ b/webview-ui/src/i18n/locales/nl/settings.json @@ -686,6 +686,10 @@ "todoList": { "label": "Takenlijst-tool inschakelen", "description": "Wanneer ingeschakeld, kan Roo takenlijsten maken en beheren om de voortgang van taken bij te houden. Dit helpt complexe taken te organiseren in beheersbare stappen." + }, + "toolCall": { + "label": "Tool-aanroepen inschakelen", + "description": "Wanneer ingeschakeld, kan Roo tool-aanroepen gebruiken in plaats van XML-gebaseerde tool-aanroep prompts. Experimentele functie!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json index c9aa603d2faa..6de82089d4d5 100644 --- a/webview-ui/src/i18n/locales/pl/settings.json +++ b/webview-ui/src/i18n/locales/pl/settings.json @@ -686,6 +686,10 @@ "todoList": { "label": "Włącz narzędzie listy zadań", "description": "Po włączeniu Roo może tworzyć i zarządzać listami zadań do śledzenia postępu zadań. Pomaga to organizować złożone zadania w łatwe do zarządzania kroki." + }, + "toolCall": { + "label": "Włącz wywołania narzędzi", + "description": "Po włączeniu Roo może używać wywołań narzędzi zamiast opartych na XML promptów wywołania narzędzi. Funkcja eksperymentalna!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json index 0fbb47d34875..aa17ef9f7632 100644 --- a/webview-ui/src/i18n/locales/pt-BR/settings.json +++ b/webview-ui/src/i18n/locales/pt-BR/settings.json @@ -686,6 +686,10 @@ "todoList": { "label": "Habilitar ferramenta de lista de tarefas", "description": "Quando habilitado, o Roo pode criar e gerenciar listas de tarefas para acompanhar o progresso das tarefas. Isso ajuda a organizar tarefas complexas em etapas gerenciáveis." + }, + "toolCall": { + "label": "Habilitar chamadas de ferramentas", + "description": "Quando habilitado, o Roo pode usar chamadas de ferramentas em vez de prompts de chamada de ferramentas baseados em XML. Recurso experimental!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json index 24b09ab6c1b9..bd9b81aad83d 100644 --- a/webview-ui/src/i18n/locales/ru/settings.json +++ b/webview-ui/src/i18n/locales/ru/settings.json @@ -686,6 +686,10 @@ "todoList": { "label": "Включить инструмент списка задач", "description": "При включении Roo может создавать и управлять списками задач для отслеживания прогресса. Это помогает организовать сложные задачи в управляемые шаги." + }, + "toolCall": { + "label": "Включить вызовы инструментов", + "description": "При включении Roo может использовать вызовы инструментов вместо XML-подсказок вызова инструментов. Экспериментальная функция!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json index 91e5b3e9d02c..b2a47e1d62fe 100644 --- a/webview-ui/src/i18n/locales/tr/settings.json +++ b/webview-ui/src/i18n/locales/tr/settings.json @@ -686,6 +686,10 @@ "todoList": { "label": "Yapılacaklar listesi aracını etkinleştir", "description": "Etkinleştirildiğinde, Roo görev ilerlemesini takip etmek için yapılacaklar listeleri oluşturabilir ve yönetebilir. Bu, karmaşık görevleri yönetilebilir adımlara organize etmeye yardımcı olur." + }, + "toolCall": { + "label": "Araç çağrılarını etkinleştir", + "description": "Etkinleştirildiğinde, Roo XML tabanlı araç çağrı komutları yerine araç çağrılarını kullanabilir. Deneysel özellik!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json index c6fdea7841c4..e18d9293838e 100644 --- a/webview-ui/src/i18n/locales/vi/settings.json +++ b/webview-ui/src/i18n/locales/vi/settings.json @@ -686,6 +686,10 @@ "todoList": { "label": "Bật công cụ danh sách việc cần làm", "description": "Khi được bật, Roo có thể tạo và quản lý danh sách việc cần làm để theo dõi tiến độ công việc. Điều này giúp tổ chức các tác vụ phức tạp thành các bước có thể quản lý được." + }, + "toolCall": { + "label": "Bật gọi công cụ", + "description": "Khi được bật, Roo có thể sử dụng gọi công cụ thay vì lời nhắc gọi công cụ dựa trên XML. Tính năng thử nghiệm!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json index c8ca284c04b6..be66626556ec 100644 --- a/webview-ui/src/i18n/locales/zh-CN/settings.json +++ b/webview-ui/src/i18n/locales/zh-CN/settings.json @@ -686,6 +686,10 @@ "todoList": { "label": "启用任务清单工具", "description": "启用后,Roo 可以创建和管理任务清单来跟踪任务进度。这有助于将复杂任务组织成可管理的步骤。" + }, + "toolCall": { + "label": "启用工具调用", + "description": "启用后,Roo 可以使用工具调用而不是基于 XML 的工具调用提示。实验性功能!!!" } }, "experimental": { diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json index 8163cce20fd3..36707defffe0 100644 --- a/webview-ui/src/i18n/locales/zh-TW/settings.json +++ b/webview-ui/src/i18n/locales/zh-TW/settings.json @@ -686,6 +686,10 @@ "todoList": { "label": "啟用待辦事項清單工具", "description": "啟用後,Roo 可以建立和管理待辦事項清單來追蹤任務進度。這有助於將複雜任務組織成可管理的步驟。" + }, + "toolCall": { + "label": "啟用工具呼叫", + "description": "啟用後,Roo 可以使用工具呼叫而不是基於 XML 的工具呼叫提示。實驗性功能!!!" } }, "experimental": { From aeac707009823ce9225a6eeb1d0df571709f0f0e Mon Sep 17 00:00:00 2001 From: NaccOll Date: Thu, 11 Sep 2025 14:39:14 +0800 Subject: [PATCH 2/6] feat: Enhance update_todo_list schema description and improve tool name validation --- .../tools/schemas/update-todo-list-schema.ts | 28 +++++++++++++++++-- src/core/task/tool-call-helper.ts | 6 ++-- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/core/prompts/tools/schemas/update-todo-list-schema.ts b/src/core/prompts/tools/schemas/update-todo-list-schema.ts index d02598bfdcf0..6ca0d67a05d7 100644 --- a/src/core/prompts/tools/schemas/update-todo-list-schema.ts +++ b/src/core/prompts/tools/schemas/update-todo-list-schema.ts @@ -4,8 +4,32 @@ import { BaseToolSchema } from "./base-tool-schema" export function generateUpdateTodoListSchema(args: ToolArgs): BaseToolSchema { const schema: BaseToolSchema = { name: "update_todo_list", - description: - "Replaces the current TODO list with an updated one. This is used for tracking task progress, allows for updating multiple statuses at once, and supports adding new tasks as they arise.", + description: `**Description:** +Replace the entire TODO list with an updated checklist reflecting the current state. Always provide the full list; the system will overwrite the previous one. This tool is designed for step-by-step task tracking, allowing you to confirm completion of each step before updating, update multiple task statuses at once (e.g., mark one as completed and start the next), and dynamically add new todos discovered during long or complex tasks. + +**Checklist Format:** +- Use a single-level markdown checklist (no nesting or subtasks). +- List todos in the intended execution order. +- Status options: + [ ] Task description (pending) + [x] Task description (completed) + [-] Task description (in progress) + +**Status Rules:** +[ ] = pending (not started) +[x] = completed (fully finished, no unresolved issues) +[-] = in_progress (currently being worked on) + +**Core Principles:** +- Before updating, always confirm which todos have been completed since the last update. +- You may update multiple statuses in a single update (e.g., mark the previous as completed and the next as in progress). +- When a new actionable item is discovered during a long or complex task, add it to the todo list immediately. +- Do not remove any unfinished todos unless explicitly instructed. +- Always retain all unfinished tasks, updating their status as needed. +- Only mark a task as completed when it is fully accomplished (no partials, no unresolved dependencies). +- If a task is blocked, keep it as in_progress and add a new todo describing what needs to be resolved. +- Remove tasks only if they are no longer relevant or if the user requests deletion. +`, parameters: [ { name: "todos", diff --git a/src/core/task/tool-call-helper.ts b/src/core/task/tool-call-helper.ts index 7d57820b8d0c..89d067feed80 100644 --- a/src/core/task/tool-call-helper.ts +++ b/src/core/task/tool-call-helper.ts @@ -163,12 +163,12 @@ export class StreamingToolCallProcessor { } // the index of GPT-5 tool_call not start by 0 const toolCall = this.accumulatedToolCalls[index] - const isValidToolName = - toolCall?.function?.name && toolRegistry.isToolSupported(toolCall.function.name as ToolName) + const toolName = toolCall?.function?.name as ToolName + const isValidToolName = toolName && toolRegistry.isToolSupported(toolName) const result: ToolCallParam = { providerType: "openai", - toolName: isValidToolName ? toolCall.function.name : "", + toolName: isValidToolName ? toolName : "", toolUserId: toolCall?.id || undefined, chunkContent: xmlOutput, originContent: this.accumulatedToolCalls, From 83c0633cc114baacc4eff678dfee32bb92eed5a2 Mon Sep 17 00:00:00 2001 From: NaccOll Date: Thu, 11 Sep 2025 23:01:07 +0800 Subject: [PATCH 3/6] feat: Update directory path description and handle empty path in listFilesTool and searchFilesTool --- src/core/prompts/tools/schemas/search-files-schema.ts | 2 +- src/core/tools/listFilesTool.ts | 6 +++++- src/core/tools/searchFilesTool.ts | 6 +++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/core/prompts/tools/schemas/search-files-schema.ts b/src/core/prompts/tools/schemas/search-files-schema.ts index 227fb8c8f296..cb77f8a77516 100644 --- a/src/core/prompts/tools/schemas/search-files-schema.ts +++ b/src/core/prompts/tools/schemas/search-files-schema.ts @@ -9,7 +9,7 @@ export function generateSearchFilesSchema(args: ToolArgs): BaseToolSchema { { name: "path", type: "string", - description: `Directory path to search in (relative to workspace directory ${args.cwd}). This directory will be recursively searched. When searching the entire workspace, the parameter value is '.'`, + description: `The path of the directory to search in (relative to the current workspace directory ${args.cwd}). This directory will be recursively searched.`, required: true, }, { diff --git a/src/core/tools/listFilesTool.ts b/src/core/tools/listFilesTool.ts index e51453c5d9e9..55bed6c94a95 100644 --- a/src/core/tools/listFilesTool.ts +++ b/src/core/tools/listFilesTool.ts @@ -31,10 +31,14 @@ export async function listFilesTool( pushToolResult: PushToolResult, removeClosingTag: RemoveClosingTag, ) { - const relDirPath: string | undefined = block.params.path + let relDirPath: string | undefined = block.params.path const recursiveRaw: string | undefined = block.params.recursive const recursive = recursiveRaw?.toLowerCase() === "true" + if (block.toolUseId && relDirPath === "") { + relDirPath = "." + } + // Calculate if the path is outside workspace const absolutePath = relDirPath ? path.resolve(cline.cwd, relDirPath) : cline.cwd const isOutsideWorkspace = isPathOutsideWorkspace(absolutePath) diff --git a/src/core/tools/searchFilesTool.ts b/src/core/tools/searchFilesTool.ts index b6ee97f87426..45b408747015 100644 --- a/src/core/tools/searchFilesTool.ts +++ b/src/core/tools/searchFilesTool.ts @@ -15,10 +15,14 @@ export async function searchFilesTool( pushToolResult: PushToolResult, removeClosingTag: RemoveClosingTag, ) { - const relDirPath: string | undefined = block.params.path + let relDirPath: string | undefined = block.params.path const regex: string | undefined = block.params.regex const filePattern: string | undefined = block.params.file_pattern + if (block.toolUseId && relDirPath === "") { + relDirPath = "." + } + const absolutePath = relDirPath ? path.resolve(cline.cwd, relDirPath) : cline.cwd const isOutsideWorkspace = isPathOutsideWorkspace(absolutePath) From 44766076fe7e82da5de2caf73355f699d35050a4 Mon Sep 17 00:00:00 2001 From: NaccOll Date: Fri, 12 Sep 2025 16:54:20 +0800 Subject: [PATCH 4/6] feat: remove args param to avoid claude tool call format error --- .../tools/schemas/apply-diff-schema.ts | 94 +++++++++---------- .../prompts/tools/schemas/read-file-schema.ts | 62 ++++++------ src/core/tools/multiApplyDiffTool.ts | 6 +- src/core/tools/readFileTool.ts | 12 ++- 4 files changed, 82 insertions(+), 92 deletions(-) diff --git a/src/core/prompts/tools/schemas/apply-diff-schema.ts b/src/core/prompts/tools/schemas/apply-diff-schema.ts index 96c35a18b618..e4f083ae1e0c 100644 --- a/src/core/prompts/tools/schemas/apply-diff-schema.ts +++ b/src/core/prompts/tools/schemas/apply-diff-schema.ts @@ -158,61 +158,53 @@ When applying the diffs, be extra careful to remember to change any closing brac ALWAYS make as many changes in a single 'apply_diff' request as possible using multiple SEARCH/REPLACE blocks`, parameters: [ { - name: "args", - type: "object", - description: "Container for the file modification arguments.", + name: "file", + type: "array", + description: `One or more file change objects.`, required: true, - properties: { - file: { - name: "file", - type: "array", - description: `One or more file change objects.`, - required: true, - items: { - name: "fileItem", - type: "object", - description: "A file modification object containing the path and diff operations.", + items: { + name: "fileItem", + type: "object", + description: "A file modification object containing the path and diff operations.", + required: true, + properties: { + path: { + name: "path", + type: "string", + description: `The path of the file to modify (relative to the current workspace directory ${args.cwd})`, required: true, - properties: { - path: { - name: "path", - type: "string", - description: `The path of the file to modify (relative to the current workspace directory ${args.cwd})`, - required: true, - }, - diff: { - name: "diff", - type: "array", - description: "One or more diff elements containing.", - required: true, - items: { - name: "diffItem", - type: "object", + }, + diff: { + name: "diff", + type: "array", + description: "One or more diff elements containing.", + required: true, + items: { + name: "diffItem", + type: "object", + description: + "A single search-and-replace operation. This object contains the search criteria and the replacement content.", + required: true, + properties: { + search: { + name: "search", + type: "string", description: - "A single search-and-replace operation. This object contains the search criteria and the replacement content.", + "SEARCH BLOCK. MUST exactly match existing content including whitespace and indentation.", + required: true, + }, + replace: { + name: "replace", + type: "string", + description: "REPLACE BLOCK.", required: true, - properties: { - search: { - name: "search", - type: "string", - description: - "SEARCH BLOCK. MUST exactly match existing content including whitespace and indentation.", - required: true, - }, - replace: { - name: "replace", - type: "string", - description: "REPLACE BLOCK.", - required: true, - }, - start_line: { - name: "start_line", - type: "number", - description: - "The line number of original content where the search block starts", - required: false, - }, - }, + }, + start_line: { + name: "start_line", + type: "number", + description: + "The line number of original content where the search block starts", + required: false, }, }, }, diff --git a/src/core/prompts/tools/schemas/read-file-schema.ts b/src/core/prompts/tools/schemas/read-file-schema.ts index bf1577756931..d907f8311b1a 100644 --- a/src/core/prompts/tools/schemas/read-file-schema.ts +++ b/src/core/prompts/tools/schemas/read-file-schema.ts @@ -29,44 +29,36 @@ ${ ${isMultipleReadsEnabled ? `- When you need to read more than ${maxConcurrentReads} files, prioritize the most critical files first, then use subsequent read_file requests for additional files` : ""}`, parameters: [ { - name: "args", - type: "object", - description: "Contains one or more file elements, where each file contains.", + name: "file", + type: "array", + description: `An array of file objects to read, with an optional line range. Reading multiple files (within the ${maxConcurrentReads}-file limit).`, required: true, - properties: { - file: { - name: "file", - type: "array", - description: `An array of file objects to read, with an optional line range. Reading multiple files (within the ${maxConcurrentReads}-file limit).`, - required: true, - items: { - name: "fileItem", - type: "object", - description: "A file object", + items: { + name: "fileItem", + type: "object", + description: "A file object", + required: true, + properties: { + path: { + name: "path", + type: "string", + description: `File path (relative to workspace directory ${args.cwd}).`, required: true, - properties: { - path: { - name: "path", - type: "string", - description: `File path (relative to workspace directory ${args.cwd}).`, - required: true, - }, - ...(partialReadsEnabled - ? { - line_range: { - name: "line_range", - type: "array", - description: `One or more line range elements in format "start-end" (1-based, inclusive).`, - required: false, - items: { - name: "text", - type: "string", - }, - }, - } - : {}), - }, }, + ...(partialReadsEnabled + ? { + line_range: { + name: "line_range", + type: "array", + description: `One or more line range elements in format "start-end" (1-based, inclusive).`, + required: false, + items: { + name: "text", + type: "string", + }, + }, + } + : {}), }, }, }, diff --git a/src/core/tools/multiApplyDiffTool.ts b/src/core/tools/multiApplyDiffTool.ts index 4aae13473d3d..2d9025ae067b 100644 --- a/src/core/tools/multiApplyDiffTool.ts +++ b/src/core/tools/multiApplyDiffTool.ts @@ -105,11 +105,11 @@ export async function applyDiffTool( return } - if (argsXmlTag) { + if (argsXmlTag || block.toolUseId) { // Parse file entries from XML (new way) try { let files = [] as any[] - if (!block.toolUseId) { + if (argsXmlTag) { // IMPORTANT: We use parseXmlForDiff here instead of parseXml to prevent HTML entity decoding // This ensures exact character matching when comparing parsed content against original file content // Without this, special characters like & would be decoded to & causing diff mismatches @@ -117,7 +117,7 @@ export async function applyDiffTool( files = Array.isArray(parsed.file) ? parsed.file : [parsed.file].filter(Boolean) } else { const input = block.toolUseParam?.input as any - const args = input.args + const args = input files = Array.isArray(args?.file) ? args?.file : [args?.file].filter(Boolean) } diff --git a/src/core/tools/readFileTool.ts b/src/core/tools/readFileTool.ts index 01427f4d9dc7..5b70ed7aa5e3 100644 --- a/src/core/tools/readFileTool.ts +++ b/src/core/tools/readFileTool.ts @@ -125,11 +125,17 @@ export async function readFileTool( const fileEntries: FileEntry[] = [] - if (argsXmlTag) { + if (argsXmlTag || block.toolUseId) { // Parse file entries from XML (new multi-file format) try { - const parsed = parseXml(argsXmlTag) as any - const files = Array.isArray(parsed.file) ? parsed.file : [parsed.file].filter(Boolean) + let files: any[] = [] + if (argsXmlTag) { + const parsed = parseXml(argsXmlTag) as any + files = Array.isArray(parsed.file) ? parsed.file : [parsed.file].filter(Boolean) + } else { + const params: any = block.toolUseParam?.input + files = params?.file || [] + } for (const file of files) { if (!file.path) continue // Skip if no path in a file entry From bf255fb09cf46697c10d814e26fc74b7980abea4 Mon Sep 17 00:00:00 2001 From: NaccOll Date: Fri, 12 Sep 2025 16:56:02 +0800 Subject: [PATCH 5/6] feat: update XML parsing logic to handle multi-file format without toolUseId --- src/core/tools/multiApplyDiffTool.ts | 2 +- src/core/tools/readFileTool.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/tools/multiApplyDiffTool.ts b/src/core/tools/multiApplyDiffTool.ts index 2d9025ae067b..7fa567829f70 100644 --- a/src/core/tools/multiApplyDiffTool.ts +++ b/src/core/tools/multiApplyDiffTool.ts @@ -109,7 +109,7 @@ export async function applyDiffTool( // Parse file entries from XML (new way) try { let files = [] as any[] - if (argsXmlTag) { + if (argsXmlTag && !block.toolUseId) { // IMPORTANT: We use parseXmlForDiff here instead of parseXml to prevent HTML entity decoding // This ensures exact character matching when comparing parsed content against original file content // Without this, special characters like & would be decoded to & causing diff mismatches diff --git a/src/core/tools/readFileTool.ts b/src/core/tools/readFileTool.ts index 5b70ed7aa5e3..0410dc70c4cb 100644 --- a/src/core/tools/readFileTool.ts +++ b/src/core/tools/readFileTool.ts @@ -129,7 +129,7 @@ export async function readFileTool( // Parse file entries from XML (new multi-file format) try { let files: any[] = [] - if (argsXmlTag) { + if (argsXmlTag && !block.toolUseId) { const parsed = parseXml(argsXmlTag) as any files = Array.isArray(parsed.file) ? parsed.file : [parsed.file].filter(Boolean) } else { From 62c66b1a3bdc04d788b58ab8e846c7ad3289542b Mon Sep 17 00:00:00 2001 From: NaccOll Date: Fri, 12 Sep 2025 18:16:30 +0800 Subject: [PATCH 6/6] feat: enhance documentation for ask_followup and update_todo_list schemas with usage examples and guidelines --- .../schemas/ask-followup-question-schema.ts | 14 +++++- .../tools/schemas/update-todo-list-schema.ts | 48 +++++++++++++++---- 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/src/core/prompts/tools/schemas/ask-followup-question-schema.ts b/src/core/prompts/tools/schemas/ask-followup-question-schema.ts index 50215fdde844..072499beacb5 100644 --- a/src/core/prompts/tools/schemas/ask-followup-question-schema.ts +++ b/src/core/prompts/tools/schemas/ask-followup-question-schema.ts @@ -15,7 +15,19 @@ export function generateAskFollowupQuestionSchema(args: ToolArgs): BaseToolSchem { name: "follow_up", type: "string", - description: `A list of 2-4 suggested answers, each in its own tag. Suggestions must be complete, actionable answers without placeholders. Optionally include mode attribute to switch modes (code/architect/etc.), such as 'suggestion text'`, + description: `A list of 2-4 suggested answers, each in its own tag. Suggestions must be complete, actionable answers without placeholders. Optionally include mode attribute to switch modes (code/architect/etc.) + +Question1: Your question here +Example1: +First suggestion +Action with mode switch + +Question2: What is the path to the frontend-config.json file? +Example2: +./src/frontend-config.json +./config/frontend-config.json +./frontend-config.json +`, required: true, }, ], diff --git a/src/core/prompts/tools/schemas/update-todo-list-schema.ts b/src/core/prompts/tools/schemas/update-todo-list-schema.ts index 6ca0d67a05d7..b942725267b8 100644 --- a/src/core/prompts/tools/schemas/update-todo-list-schema.ts +++ b/src/core/prompts/tools/schemas/update-todo-list-schema.ts @@ -11,14 +11,14 @@ Replace the entire TODO list with an updated checklist reflecting the current st - Use a single-level markdown checklist (no nesting or subtasks). - List todos in the intended execution order. - Status options: - [ ] Task description (pending) - [x] Task description (completed) - [-] Task description (in progress) + - [ ] Task description (pending) + - [x] Task description (completed) + - [-] Task description (in progress) **Status Rules:** -[ ] = pending (not started) -[x] = completed (fully finished, no unresolved issues) -[-] = in_progress (currently being worked on) +- [ ] = pending (not started) +- [x] = completed (fully finished, no unresolved issues) +- [-] = in_progress (currently being worked on) **Core Principles:** - Before updating, always confirm which todos have been completed since the last update. @@ -29,13 +29,45 @@ Replace the entire TODO list with an updated checklist reflecting the current st - Only mark a task as completed when it is fully accomplished (no partials, no unresolved dependencies). - If a task is blocked, keep it as in_progress and add a new todo describing what needs to be resolved. - Remove tasks only if they are no longer relevant or if the user requests deletion. + +**When to Use:** +- The task is complicated or involves multiple steps or requires ongoing tracking. +- You need to update the status of several todos at once. +- New actionable items are discovered during task execution. +- The user requests a todo list or provides multiple tasks. +- The task is complex and benefits from clear, stepwise progress tracking. + +**When NOT to Use:** +- There is only a single, trivial task. +- The task can be completed in one or two simple steps. +- The request is purely conversational or informational. + +**Task Management Guidelines:** +- Mark task as completed immediately after all work of the current task is done. +- Start the next task by marking it as in_progress. +- Add new todos as soon as they are identified. +- Use clear, descriptive task names. `, parameters: [ { name: "todos", type: "string", - description: - "The complete TODO list in Markdown checklist format. Use '[ ]' for pending, '[x]' for completed, and '[-]' for in-progress tasks.", + description: `The complete TODO list without prefix '-'. + +**Usage Example:** +[x] Analyze requirements +[x] Design architecture +[-] Implement core logic +[ ] Write tests +[ ] Update documentation + +*After completing "Implement core logic" and starting "Write tests":* +[x] Analyze requirements +[x] Design architecture +[x] Implement core logic +[-] Write tests +[ ] Update documentation +[ ] Add performance benchmarks `, required: true, }, ],