fix: defer new_task tool_result until subtask completes for native protocol

daniel-lxs · daniel-lxs · commit ce71beff0b13 · 2025-11-26T16:39:12.000-05:00
For native tools, the API requires strict message ordering (User &gt; Assistant &gt; Tool Call &gt; Tool Result).
The new_task tool was causing 400 errors because:
1. NewTaskTool pushed a tool_result immediately
2. Parent task paused for subtask
3. completeSubtask added a separate user message
4. This resulted in consecutive user messages which broke the ordering

This fix defers the tool_result until the subtask completes:
- NewTaskTool stores pendingNewTaskToolCallId but doesn't push tool_result (native only)
- Task loop stays alive via isPaused condition in stack push
- completeSubtask pushes the actual tool_result with subtask's real result
- After waitForSubtask, content is copied to currentUserContent

Benefits:
- No more 400 errors with native tools
- Parent task now sees what the subtask actually accomplished
- XML protocol behavior is unchanged
diff --git a/src/core/assistant-message/presentAssistantMessage.ts b/src/core/assistant-message/presentAssistantMessage.ts
@@ -938,6 +938,7 @@ export async function presentAssistantMessage(cline: Task) {
 						pushToolResult,
 						removeClosingTag,
 						toolProtocol,
+						toolCallId: block.id,
 					})
 					break
 				case "attempt_completion": {
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
@@ -157,6 +157,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 	readonly rootTaskId?: string
 	readonly parentTaskId?: string
 	childTaskId?: string
+	pendingNewTaskToolCallId?: string
 
 	readonly instanceId: string
 	readonly metadata: TaskMetadata
@@ -1967,10 +1968,29 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		try {
 			await this.say("subtask_result", lastMessage)
 
-			await this.addToApiConversationHistory({
-				role: "user",
-				content: [{ type: "text", text: `[new_task completed] Result: ${lastMessage}` }],
-			})
+			// Check if using native protocol to determine how to add the subtask result
+			const modelInfo = this.api.getModel().info
+			const toolProtocol = resolveToolProtocol(this.apiConfiguration, modelInfo)
+
+			if (toolProtocol === "native" && this.pendingNewTaskToolCallId) {
+				// For native protocol, push the actual tool_result with the subtask's real result.
+				// NewTaskTool deferred pushing the tool_result until now so that the parent task
+				// gets useful information about what the subtask actually accomplished.
+				this.userMessageContent.push({
+					type: "tool_result",
+					tool_use_id: this.pendingNewTaskToolCallId,
+					content: `[new_task completed] Result: ${lastMessage}`,
+				} as Anthropic.ToolResultBlockParam)
+
+				// Clear the pending tool call ID
+				this.pendingNewTaskToolCallId = undefined
+			} else {
+				// For XML protocol (or if no pending tool call ID), add as a separate user message
+				await this.addToApiConversationHistory({
+					role: "user",
+					content: [{ type: "text", text: `[new_task completed] Result: ${lastMessage}` }],
+				})
+			}
 		} catch (error) {
 			this.providerRef
 				.deref()
@@ -2073,6 +2093,14 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				provider.log(`[subtasks] paused ${this.taskId}.${this.instanceId}`)
 				await this.waitForSubtask()
 				provider.log(`[subtasks] resumed ${this.taskId}.${this.instanceId}`)
+
+				// After subtask completes, completeSubtask has pushed content to userMessageContent.
+				// Copy it to currentUserContent so it gets sent to the API in this iteration.
+				if (this.userMessageContent.length > 0) {
+					currentUserContent.push(...this.userMessageContent)
+					this.userMessageContent = []
+				}
+
 				const currentMode = (await provider.getState())?.mode ?? defaultModeSlug
 
 				if (currentMode !== this.pausedModeSlug) {
@@ -2992,7 +3020,9 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 						this.consecutiveMistakeCount++
 					}
 
-					if (this.userMessageContent.length > 0) {
+					// Push to stack if there's content OR if we're paused waiting for a subtask.
+					// When paused, we push an empty item so the loop continues to the pause check.
+					if (this.userMessageContent.length > 0 || this.isPaused) {
 						stack.push({
 							userContent: [...this.userMessageContent], // Create a copy to avoid mutation issues
 							includeFileDetails: false, // Subsequent iterations don't need file details
diff --git a/src/core/task/__tests__/Task.spec.ts b/src/core/task/__tests__/Task.spec.ts
@@ -1966,4 +1966,153 @@ describe("Queued message processing after condense", () => {
 		expect(spyB).toHaveBeenCalledWith("B message", undefined)
 		expect(taskB.messageQueueService.isEmpty()).toBe(true)
 	})
+
+	describe("completeSubtask native protocol handling", () => {
+		let mockProvider: any
+		let mockApiConfig: any
+
+		beforeEach(() => {
+			vi.clearAllMocks()
+
+			if (!TelemetryService.hasInstance()) {
+				TelemetryService.createInstance([])
+			}
+
+			mockApiConfig = {
+				apiProvider: "anthropic",
+				apiKey: "test-key",
+			}
+
+			mockProvider = {
+				context: {
+					globalStorageUri: { fsPath: "/test/storage" },
+				},
+				getState: vi.fn().mockResolvedValue({
+					apiConfiguration: mockApiConfig,
+				}),
+				say: vi.fn(),
+				postStateToWebview: vi.fn().mockResolvedValue(undefined),
+				postMessageToWebview: vi.fn().mockResolvedValue(undefined),
+				updateTaskHistory: vi.fn().mockResolvedValue(undefined),
+				log: vi.fn(),
+			}
+		})
+
+		it("should push tool_result to userMessageContent for native protocol with pending tool call ID", async () => {
+			// Create a task with a model that supports native tools
+			const task = new Task({
+				provider: mockProvider,
+				apiConfiguration: {
+					...mockApiConfig,
+					apiProvider: "anthropic",
+					toolProtocol: "native", // Explicitly set native protocol
+				},
+				task: "parent task",
+				startTask: false,
+			})
+
+			// Mock the API to return a native protocol model
+			vi.spyOn(task.api, "getModel").mockReturnValue({
+				id: "claude-3-5-sonnet-20241022",
+				info: {
+					contextWindow: 200000,
+					maxTokens: 8192,
+					supportsPromptCache: true,
+					supportsNativeTools: true,
+					defaultToolProtocol: "native",
+				} as ModelInfo,
+			})
+
+			// For native protocol, NewTaskTool does NOT push tool_result immediately.
+			// It only sets the pending tool call ID. The actual tool_result is pushed by completeSubtask.
+			task.pendingNewTaskToolCallId = "test-tool-call-id"
+
+			// Call completeSubtask
+			await task.completeSubtask("Subtask completed successfully")
+
+			// For native protocol, should push the actual tool_result with the subtask's result
+			expect(task.userMessageContent).toHaveLength(1)
+			expect(task.userMessageContent[0]).toEqual({
+				type: "tool_result",
+				tool_use_id: "test-tool-call-id",
+				content: "[new_task completed] Result: Subtask completed successfully",
+			})
+
+			// Should NOT have added a user message to apiConversationHistory
+			expect(task.apiConversationHistory).toHaveLength(0)
+
+			// pending tool call ID should be cleared
+			expect(task.pendingNewTaskToolCallId).toBeUndefined()
+		})
+
+		it("should add user message to apiConversationHistory for XML protocol", async () => {
+			// Create a task with a model that doesn't support native tools
+			const task = new Task({
+				provider: mockProvider,
+				apiConfiguration: {
+					...mockApiConfig,
+					apiProvider: "anthropic",
+				},
+				task: "parent task",
+				startTask: false,
+			})
+
+			// Mock the API to return an XML protocol model (no native tool support)
+			vi.spyOn(task.api, "getModel").mockReturnValue({
+				id: "claude-2",
+				info: {
+					contextWindow: 100000,
+					maxTokens: 4096,
+					supportsPromptCache: false,
+					supportsNativeTools: false,
+				} as ModelInfo,
+			})
+
+			// Call completeSubtask
+			await task.completeSubtask("Subtask completed successfully")
+
+			// For XML protocol, should add to apiConversationHistory
+			expect(task.apiConversationHistory).toHaveLength(1)
+			expect(task.apiConversationHistory[0]).toEqual(
+				expect.objectContaining({
+					role: "user",
+					content: [{ type: "text", text: "[new_task completed] Result: Subtask completed successfully" }],
+				}),
+			)
+
+			// Should NOT have added to userMessageContent
+			expect(task.userMessageContent).toHaveLength(0)
+		})
+
+		it("should set isPaused to false after completeSubtask", async () => {
+			const task = new Task({
+				provider: mockProvider,
+				apiConfiguration: mockApiConfig,
+				task: "parent task",
+				startTask: false,
+			})
+
+			// Mock the API to return an XML protocol model
+			vi.spyOn(task.api, "getModel").mockReturnValue({
+				id: "claude-2",
+				info: {
+					contextWindow: 100000,
+					maxTokens: 4096,
+					supportsPromptCache: false,
+					supportsNativeTools: false,
+				} as ModelInfo,
+			})
+
+			// Set isPaused to true (simulating waiting for subtask)
+			task.isPaused = true
+			task.childTaskId = "child-task-id"
+
+			// Call completeSubtask
+			await task.completeSubtask("Subtask completed")
+
+			// Should reset paused state
+			expect(task.isPaused).toBe(false)
+			expect(task.childTaskId).toBeUndefined()
+		})
+	})
 })
diff --git a/src/core/tools/BaseTool.ts b/src/core/tools/BaseTool.ts
@@ -18,6 +18,7 @@ export interface ToolCallbacks {
 	pushToolResult: PushToolResult
 	removeClosingTag: RemoveClosingTag
 	toolProtocol: ToolProtocol
+	toolCallId?: string
 }
 
 /**
diff --git a/src/core/tools/NewTaskTool.ts b/src/core/tools/NewTaskTool.ts
@@ -30,7 +30,7 @@ export class NewTaskTool extends BaseTool<"new_task"> {
 
 	async execute(params: NewTaskParams, task: Task, callbacks: ToolCallbacks): Promise<void> {
 		const { mode, message, todos } = params
-		const { askApproval, handleError, pushToolResult, toolProtocol } = callbacks
+		const { askApproval, handleError, pushToolResult, toolProtocol, toolCallId } = callbacks
 
 		try {
 			// Validate required parameters.
@@ -133,9 +133,20 @@ export class NewTaskTool extends BaseTool<"new_task"> {
 				return
 			}
 
-			pushToolResult(
-				`Successfully created new task in ${targetMode.name} mode with message: ${unescapedMessage} and ${todoItems.length} todo items`,
-			)
+			// For native protocol, defer the tool_result until the subtask completes.
+			// The actual result (including what the subtask accomplished) will be pushed
+			// by completeSubtask. This gives the parent task useful information about
+			// what the subtask actually did.
+			if (toolProtocol === "native" && toolCallId) {
+				task.pendingNewTaskToolCallId = toolCallId
+				// Don't push tool_result here - it will come from completeSubtask with the actual result.
+				// The task loop will stay alive because isPaused is true (see Task.ts stack push condition).
+			} else {
+				// For XML protocol, push the result immediately (existing behavior)
+				pushToolResult(
+					`Successfully created new task in ${targetMode.name} mode with message: ${unescapedMessage} and ${todoItems.length} todo items`,
+				)
+			}
 
 			return
 		} catch (error) {

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@ export interface ToolCallbacks {`
`18`	`18`	`pushToolResult: PushToolResult`
`19`	`19`	`removeClosingTag: RemoveClosingTag`
`20`	`20`	`toolProtocol: ToolProtocol`
	`21`	`+ toolCallId?: string`
`21`	`22`	`}`
`22`	`23`
`23`	`24`	`/**`