From ebcaad82490ea7d583b8d91b57beb5a9dd3b6d29 Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Wed, 12 Nov 2025 11:33:44 -0700
Subject: [PATCH 1/6] Migrate conversation continuity to plugin-side encrypted
 reasoning items (Responses API)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary
We moved continuity off OpenAI servers and now maintain conversation state locally by persisting and replaying encrypted reasoning items. Requests are stateless (store=false) while retaining the performance/caching benefits of the Responses API.

Why
This aligns with how Roo manages context and simplifies our Responses API implementation while keeping all the benefits of continuity, caching, and latency improvements.

What changed
- All OpenAI models now use the Responses API; system instructions are passed via the top-level instructions field; requests include store=false and include=["reasoning.encrypted_content"].
- We persist encrypted reasoning items (type: "reasoning", encrypted_content, optional id) into API history and replay them on subsequent turns.
- Reasoning summaries default to summary: "auto" when supported; text.verbosity only when supported.
- Atomic persistence via safeWriteJson.

Removed
- previous_response_id flows, suppressPreviousResponseId/skipPrevResponseIdOnce, persistGpt5Metadata(), and GPT‑5 response ID metadata in UI messages.

Kept
- taskId and mode metadata for cross-provider features.

Result
- ZDR-friendly, stateless continuity with equal or better performance and a simpler codepath.
---
 packages/types/src/message.ts                 |   9 -
 src/api/index.ts                              |  23 +-
 .../providers/__tests__/openai-native.spec.ts | 114 ++----
 src/api/providers/openai-native.ts            | 334 ++++--------------
 .../__tests__/taskMessages.spec.ts            |   3 -
 src/core/task-persistence/apiMessages.ts      |  10 +-
 src/core/task/Task.ts                         | 178 +++-------
 src/core/task/__tests__/Task.spec.ts          |  12 +-
 src/core/task/types.ts                        |  15 +-
 9 files changed, 166 insertions(+), 532 deletions(-)
diff --git a/packages/types/src/message.ts b/packages/types/src/message.ts
index 89374222d6..09737f9ea6 100644
--- a/packages/types/src/message.ts
+++ b/packages/types/src/message.ts
@@ -226,15 +226,6 @@ export const clineMessageSchema = z.object({
 	isProtected: z.boolean().optional(),
 	apiProtocol: z.union([z.literal("openai"), z.literal("anthropic")]).optional(),
 	isAnswered: z.boolean().optional(),
-	metadata: z
-		.object({
-			gpt5: z
-				.object({
-					previous_response_id: z.string().optional(),
-				})
-				.optional(),
-		})
-		.optional(),
 })
 
 export type ClineMessage = z.infer<typeof clineMessageSchema>
diff --git a/src/api/index.ts b/src/api/index.ts
index 351f4ef1be..ae8be51349 100644
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -49,23 +49,20 @@ export interface SingleCompletionHandler {
 }
 
 export interface ApiHandlerCreateMessageMetadata {
-	mode?: string
-	taskId: string
-	previousResponseId?: string
 	/**
-	 * When true, the provider must NOT fall back to internal continuity state
-	 * (e.g., lastResponseId) if previousResponseId is absent.
-	 * Used to enforce "skip once" after a condense operation.
+	 * Task ID used for tracking and provider-specific features:
+	 * - DeepInfra: Used as prompt_cache_key for caching
+	 * - Roo: Sent as X-Roo-Task-ID header
+	 * - Requesty: Sent as trace_id
+	 * - Unbound: Sent in unbound_metadata
 	 */
-	suppressPreviousResponseId?: boolean
+	taskId: string
 	/**
-	 * Controls whether the response should be stored for 30 days in OpenAI's Responses API.
-	 * When true (default), responses are stored and can be referenced in future requests
-	 * using the previous_response_id for efficient conversation continuity.
-	 * Set to false to opt out of response storage for privacy or compliance reasons.
-	 * @default true
+	 * Current mode slug for provider-specific tracking:
+	 * - Requesty: Sent in extra metadata
+	 * - Unbound: Sent in unbound_metadata
 	 */
-	store?: boolean
+	mode?: string
 }
 
 export interface ApiHandler {
diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts
index 618cdeac65..b907dd1a91 100644
--- a/src/api/providers/__tests__/openai-native.spec.ts
+++ b/src/api/providers/__tests__/openai-native.spec.ts
@@ -686,69 +686,6 @@ describe("OpenAiNativeHandler", () => {
 			expect(contentChunks).toHaveLength(0)
 		})
 
-		it("should support previous_response_id for conversation continuity", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						// Include response ID in the response
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.created","response":{"id":"resp_123","status":"in_progress"}}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Response with ID"}}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.done","response":{"id":"resp_123","usage":{"prompt_tokens":10,"completion_tokens":3}}}\n\n',
-							),
-						)
-						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
-			// Mock SDK to fail
-			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			// First request - should not have previous_response_id
-			const stream1 = handler.createMessage(systemPrompt, messages)
-			const chunks1: any[] = []
-			for await (const chunk of stream1) {
-				chunks1.push(chunk)
-			}
-
-			// Verify first request doesn't include previous_response_id
-			let firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body)
-			expect(firstCallBody.previous_response_id).toBeUndefined()
-
-			// Second request with metadata - should include previous_response_id
-			const stream2 = handler.createMessage(systemPrompt, messages, {
-				taskId: "test-task",
-				previousResponseId: "resp_456",
-			})
-			const chunks2: any[] = []
-			for await (const chunk of stream2) {
-				chunks2.push(chunk)
-			}
-
-			// Verify second request includes the provided previous_response_id
-			let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
-			expect(secondCallBody.previous_response_id).toBe("resp_456")
-		})
-
 		it("should handle unhandled stream events gracefully", async () => {
 			// Mock fetch for the fallback SSE path
 			const mockFetch = vitest.fn().mockResolvedValue({
@@ -798,7 +735,7 @@ describe("OpenAiNativeHandler", () => {
 			expect(textChunks[0].text).toBe("Hello")
 		})
 
-		it("should use stored response ID when metadata doesn't provide one", async () => {
+		it.skip("should use stored response ID when metadata doesn't provide one - DEPRECATED", async () => {
 			// Mock fetch for Responses API
 			const mockFetch = vitest
 				.fn()
@@ -854,12 +791,10 @@ describe("OpenAiNativeHandler", () => {
 				// consume stream
 			}
 
-			// Verify second request uses the stored response ID from first request
-			let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
-			expect(secondCallBody.previous_response_id).toBe("resp_789")
+			// DEPRECATED: This test is for old previous_response_id behavior
 		})
 
-		it("should retry with full conversation when previous_response_id fails", async () => {
+		it.skip("should retry with full conversation when previous_response_id fails - DEPRECATED", async () => {
 			// This test verifies the fix for context loss bug when previous_response_id becomes invalid
 			const mockFetch = vitest
 				.fn()
@@ -908,10 +843,9 @@ describe("OpenAiNativeHandler", () => {
 				{ role: "user", content: "And 4+4?" }, // Latest message
 			]
 
-			// Call with a previous_response_id that will fail
+			// Call without previous_response_id
 			const stream = handler.createMessage(systemPrompt, conversationMessages, {
 				taskId: "test-task",
-				previousResponseId: "resp_invalid",
 			})
 
 			const chunks: any[] = []
@@ -966,7 +900,7 @@ describe("OpenAiNativeHandler", () => {
 			])
 		})
 
-		it("should retry with full conversation when SDK returns 400 for invalid previous_response_id", async () => {
+		it.skip("should retry with full conversation when SDK returns 400 for invalid previous_response_id - DEPRECATED", async () => {
 			// Test the SDK path (executeRequest method) for handling invalid previous_response_id
 
 			// Mock SDK to return an async iterable that we can control
@@ -1010,10 +944,9 @@ describe("OpenAiNativeHandler", () => {
 				{ role: "user", content: "What number did I ask you to remember?" },
 			]
 
-			// Call with a previous_response_id that will fail
+			// Call without previous_response_id
 			const stream = handler.createMessage(systemPrompt, conversationMessages, {
 				taskId: "test-task",
-				previousResponseId: "resp_invalid",
 			})
 
 			const chunks: any[] = []
@@ -1061,7 +994,7 @@ describe("OpenAiNativeHandler", () => {
 			])
 		})
 
-		it("should only send latest message when using previous_response_id", async () => {
+		it.skip("should only send latest message when using previous_response_id - DEPRECATED", async () => {
 			// Mock fetch for Responses API
 			const mockFetch = vitest
 				.fn()
@@ -1152,7 +1085,6 @@ describe("OpenAiNativeHandler", () => {
 
 			const stream2 = handler.createMessage(systemPrompt, secondMessages, {
 				taskId: "test-task",
-				previousResponseId: "resp_001",
 			})
 			for await (const chunk of stream2) {
 				// consume stream
@@ -1169,26 +1101,44 @@ describe("OpenAiNativeHandler", () => {
 			expect(secondCallBody.previous_response_id).toBe("resp_001")
 		})
 
-		it("should correctly prepare structured input", () => {
+		it("should format full conversation correctly", async () => {
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Response"}}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			const gpt5Handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
 			})
 
-			// Test with metadata that has previousResponseId
-			// @ts-expect-error - private method
-			const { formattedInput, previousResponseId } = gpt5Handler.prepareStructuredInput(systemPrompt, messages, {
+			const stream = gpt5Handler.createMessage(systemPrompt, messages, {
 				taskId: "task1",
-				previousResponseId: "resp_123",
 			})
+			for await (const chunk of stream) {
+				// consume
+			}
 
-			expect(previousResponseId).toBe("resp_123")
-			expect(formattedInput).toEqual([
+			const callBody = JSON.parse(mockFetch.mock.calls[0][1].body)
+			expect(callBody.input).toEqual([
 				{
 					role: "user",
 					content: [{ type: "input_text", text: "Hello!" }],
 				},
 			])
+			expect(callBody.previous_response_id).toBeUndefined()
 		})
 
 		it("should provide helpful error messages for different error codes", async () => {
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
index daf6278822..de974c6f62 100644
--- a/src/api/providers/openai-native.ts
+++ b/src/api/providers/openai-native.ts
@@ -34,11 +34,10 @@ const GPT5_MODEL_PREFIX = "gpt-5"
 export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
-	private lastResponseId: string | undefined
-	private responseIdPromise: Promise<string | undefined> | undefined
-	private responseIdResolver: ((value: string | undefined) => void) | undefined
 	// Resolved service tier from Responses API (actual tier used by OpenAI)
 	private lastServiceTier: ServiceTier | undefined
+	// Complete response output array (includes reasoning items with encrypted_content)
+	private lastResponseOutput: any[] | undefined
 
 	// Event types handled by the shared event processor to avoid duplication
 	private readonly coreHandledEventTypes = new Set<string>([
@@ -126,17 +125,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		return out
 	}
 
-	private resolveResponseId(responseId: string | undefined): void {
-		if (responseId) {
-			this.lastResponseId = responseId
-		}
-		// Resolve the promise so the next request can use this ID
-		if (this.responseIdResolver) {
-			this.responseIdResolver(responseId)
-			this.responseIdResolver = undefined
-		}
-	}
-
 	override async *createMessage(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
@@ -156,6 +144,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	): ApiStream {
 		// Reset resolved tier for this request; will be set from response if present
 		this.lastServiceTier = undefined
+		// Reset output array to capture current response output items
+		this.lastResponseOutput = undefined
 
 		// Use Responses API for ALL models
 		const { verbosity, reasoning } = this.getModel()
@@ -163,60 +153,22 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		// Resolve reasoning effort for models that support it
 		const reasoningEffort = this.getReasoningEffort(model)
 
-		// Wait for any pending response ID from a previous request to be available
-		// This handles the race condition with fast nano model responses
-		let effectivePreviousResponseId = metadata?.previousResponseId
-
-		// Check if we should suppress previous response ID (e.g., after condense or message edit)
-		if (metadata?.suppressPreviousResponseId) {
-			// Clear the stored lastResponseId to prevent it from being used in future requests
-			this.lastResponseId = undefined
-			effectivePreviousResponseId = undefined
-		} else {
-			// Only try to get fallback response IDs if not suppressing
-
-			// If we have a pending response ID promise, wait for it to resolve
-			if (!effectivePreviousResponseId && this.responseIdPromise) {
-				try {
-					const resolvedId = await Promise.race([
-						this.responseIdPromise,
-						// Timeout after 100ms to avoid blocking too long
-						new Promise<undefined>((resolve) => setTimeout(() => resolve(undefined), 100)),
-					])
-					if (resolvedId) {
-						effectivePreviousResponseId = resolvedId
-					}
-				} catch {
-					// Non-fatal if promise fails
-				}
-			}
-
-			// Fall back to the last known response ID if still not available
-			if (!effectivePreviousResponseId && this.lastResponseId) {
-				effectivePreviousResponseId = this.lastResponseId
-			}
-		}
-
-		// Format input and capture continuity id
-		const { formattedInput, previousResponseId } = this.prepareStructuredInput(systemPrompt, messages, metadata)
-		const requestPreviousResponseId = effectivePreviousResponseId || previousResponseId
-
-		// Create a new promise for this request's response ID
-		this.responseIdPromise = new Promise<string | undefined>((resolve) => {
-			this.responseIdResolver = resolve
-		})
+		// Format full conversation (messages already include reasoning items from API history)
+		const formattedInput = this.formatFullConversation(systemPrompt, messages)
 
 		// Build request body
 		const requestBody = this.buildRequestBody(
 			model,
 			formattedInput,
-			requestPreviousResponseId,
 			systemPrompt,
 			verbosity,
 			reasoningEffort,
 			metadata,
 		)
 
+		// Temporary debug logging
+		console.log("[OpenAI Native] Request body:", requestBody)
+
 		// Make the request (pass systemPrompt and messages for potential retry)
 		yield* this.executeRequest(requestBody, model, metadata, systemPrompt, messages)
 	}
@@ -224,27 +176,26 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	private buildRequestBody(
 		model: OpenAiNativeModel,
 		formattedInput: any,
-		requestPreviousResponseId: string | undefined,
 		systemPrompt: string,
 		verbosity: any,
 		reasoningEffort: ReasoningEffortWithMinimal | undefined,
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): any {
-		// Build a request body (also used for fallback)
+		// Build a request body
 		// Ensure we explicitly pass max_output_tokens for GPT‑5 based on Roo's reserved model response calculation
 		// so requests do not default to very large limits (e.g., 120k).
 		interface Gpt5RequestBody {
 			model: string
-			input: Array<{ role: "user" | "assistant"; content: any[] }>
+			input: Array<{ role: "user" | "assistant"; content: any[] } | { type: string; content: string }>
 			stream: boolean
-			reasoning?: { effort: ReasoningEffortWithMinimal; summary?: "auto" }
+			reasoning?: { effort?: ReasoningEffortWithMinimal; summary?: "auto" }
 			text?: { verbosity: VerbosityLevel }
 			temperature?: number
 			max_output_tokens?: number
-			previous_response_id?: string
 			store?: boolean
 			instructions?: string
 			service_tier?: ServiceTier
+			include?: string[]
 		}
 
 		// Validate requested tier against model support; if not supported, omit.
@@ -255,17 +206,21 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			model: model.id,
 			input: formattedInput,
 			stream: true,
-			store: metadata?.store !== false, // Default to true unless explicitly set to false
+			// Always use stateless operation with encrypted reasoning
+			store: false,
 			// Always include instructions (system prompt) for Responses API.
 			// Unlike Chat Completions, system/developer roles in input have no special semantics here.
 			// The official way to set system behavior is the top-level `instructions` field.
 			instructions: systemPrompt,
-			...(reasoningEffort && {
-				reasoning: {
-					effort: reasoningEffort,
-					...(this.options.enableGpt5ReasoningSummary ? { summary: "auto" as const } : {}),
-				},
-			}),
+			include: ["reasoning.encrypted_content"],
+			...(reasoningEffort
+				? {
+						reasoning: {
+							...(reasoningEffort ? { effort: reasoningEffort } : {}),
+							...(this.options.enableGpt5ReasoningSummary ? { summary: "auto" as const } : {}),
+						},
+					}
+				: {}),
 			// Only include temperature if the model supports it
 			...(model.info.supportsTemperature !== false && {
 				temperature:
@@ -277,7 +232,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			// Explicitly include the calculated max output tokens.
 			// Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams).
 			...(model.maxTokens ? { max_output_tokens: model.maxTokens } : {}),
-			...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }),
 			// Include tier when selected and supported by the model, or when explicitly "default"
 			...(requestedTier &&
 				(requestedTier === "default" || allowedTierNames.has(requestedTier)) && {
@@ -316,60 +270,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 				}
 			}
 		} catch (sdkErr: any) {
-			// Check if this is a 400 error about previous_response_id not found
-			const errorMessage = sdkErr?.message || sdkErr?.error?.message || ""
-			const is400Error = sdkErr?.status === 400 || sdkErr?.response?.status === 400
-			const isPreviousResponseError =
-				errorMessage.includes("Previous response") || errorMessage.includes("not found")
-
-			if (is400Error && requestBody.previous_response_id && isPreviousResponseError) {
-				// Log the error and retry without the previous_response_id
-
-				// Clear the stored lastResponseId to prevent using it again
-				this.lastResponseId = undefined
-
-				// Re-prepare the full conversation without previous_response_id
-				let retryRequestBody = { ...requestBody }
-				delete retryRequestBody.previous_response_id
-
-				// If we have the original messages, re-prepare the full conversation
-				if (systemPrompt && messages) {
-					const { formattedInput } = this.prepareStructuredInput(systemPrompt, messages, undefined)
-					retryRequestBody.input = formattedInput
-				}
-
-				try {
-					// Retry with the SDK
-					const retryStream = (await (this.client as any).responses.create(
-						retryRequestBody,
-					)) as AsyncIterable<any>
-
-					if (typeof (retryStream as any)[Symbol.asyncIterator] !== "function") {
-						// If SDK fails, fall back to SSE
-						yield* this.makeGpt5ResponsesAPIRequest(
-							retryRequestBody,
-							model,
-							metadata,
-							systemPrompt,
-							messages,
-						)
-						return
-					}
-
-					for await (const event of retryStream) {
-						for await (const outChunk of this.processEvent(event, model)) {
-							yield outChunk
-						}
-					}
-					return
-				} catch (retryErr) {
-					// If retry also fails, fall back to SSE
-					yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata, systemPrompt, messages)
-					return
-				}
-			}
-
-			// For other errors, fallback to manual SSE via fetch
+			// For errors, fallback to manual SSE via fetch
 			yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata, systemPrompt, messages)
 		}
 	}
@@ -377,6 +278,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	private formatFullConversation(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): any {
 		// Format the entire conversation history for the Responses API using structured format
 		// This supports both text and images
+		// Messages already include reasoning items from API history, so we just need to format them
 		const formattedMessages: any[] = []
 
 		// Do NOT embed the system prompt as a developer message in the Responses API input.
@@ -384,6 +286,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
 		// Process each message
 		for (const message of messages) {
+			// Check if this is a reasoning item (already formatted in API history)
+			if ((message as any).type === "reasoning") {
+				// Pass through reasoning items as-is
+				formattedMessages.push(message)
+				continue
+			}
+
 			const role = message.role === "user" ? "user" : "assistant"
 			const content: any[] = []
 
@@ -421,40 +330,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		return formattedMessages
 	}
 
-	private formatSingleStructuredMessage(message: Anthropic.Messages.MessageParam): any {
-		// Format a single message for the Responses API when using previous_response_id
-		// When using previous_response_id, we only send the latest user message
-		const role = message.role === "user" ? "user" : "assistant"
-
-		if (typeof message.content === "string") {
-			// For simple string content, return structured format with proper type
-			return {
-				role,
-				content: [{ type: "input_text", text: message.content }],
-			}
-		} else if (Array.isArray(message.content)) {
-			// Extract text and image content from blocks
-			const content: any[] = []
-
-			for (const block of message.content) {
-				if (block.type === "text") {
-					// User messages use input_text
-					content.push({ type: "input_text", text: (block as any).text })
-				} else if (block.type === "image") {
-					const image = block as Anthropic.Messages.ImageBlockParam
-					const imageUrl = `data:${image.source.media_type};base64,${image.source.data}`
-					content.push({ type: "input_image", image_url: imageUrl })
-				}
-			}
-
-			if (content.length > 0) {
-				return { role, content }
-			}
-		}
-
-		return null
-	}
-
 	private async *makeGpt5ResponsesAPIRequest(
 		requestBody: any,
 		model: OpenAiNativeModel,
@@ -498,53 +373,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 					errorDetails = errorText
 				}
 
-				// Check if this is a 400 error about previous_response_id not found
-				const isPreviousResponseError =
-					errorDetails.includes("Previous response") || errorDetails.includes("not found")
-
-				if (response.status === 400 && requestBody.previous_response_id && isPreviousResponseError) {
-					// Log the error and retry without the previous_response_id
-
-					// Clear the stored lastResponseId to prevent using it again
-					this.lastResponseId = undefined
-					// Resolve the promise once to unblock any waiting requests
-					this.resolveResponseId(undefined)
-
-					// Re-prepare the full conversation without previous_response_id
-					let retryRequestBody = { ...requestBody }
-					delete retryRequestBody.previous_response_id
-
-					// If we have the original messages, re-prepare the full conversation
-					if (systemPrompt && messages) {
-						const { formattedInput } = this.prepareStructuredInput(systemPrompt, messages, undefined)
-						retryRequestBody.input = formattedInput
-					}
-
-					// Retry the request with full conversation context
-					const retryResponse = await fetch(url, {
-						method: "POST",
-						headers: {
-							"Content-Type": "application/json",
-							Authorization: `Bearer ${apiKey}`,
-							Accept: "text/event-stream",
-						},
-						body: JSON.stringify(retryRequestBody),
-					})
-
-					if (!retryResponse.ok) {
-						// If retry also fails, throw the original error
-						throw new Error(`Responses API retry failed (${retryResponse.status})`)
-					}
-
-					if (!retryResponse.body) {
-						throw new Error("Responses API error: No response body from retry request")
-					}
-
-					// Handle the successful retry response
-					yield* this.handleStreamResponse(retryResponse.body, model)
-					return
-				}
-
 				// Provide user-friendly error messages based on status code
 				switch (response.status) {
 					case 400:
@@ -600,47 +428,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		}
 	}
 
-	/**
-	 * Prepares the input and conversation continuity parameters for a Responses API call.
-	 * Decides whether to send full conversation or just the latest message based on previousResponseId.
-	 *
-	 * - If a `previousResponseId` is available (either from metadata or the handler's state),
-	 *   it formats only the most recent user message for the input and returns the response ID
-	 *   to maintain conversation context.
-	 * - Otherwise, it formats the entire conversation history (system prompt + messages) for the input.
-	 *
-	 * @returns An object containing the formatted input and the previous response ID (if used).
-	 */
-	private prepareStructuredInput(
-		systemPrompt: string,
-		messages: Anthropic.Messages.MessageParam[],
-		metadata?: ApiHandlerCreateMessageMetadata,
-	): { formattedInput: any; previousResponseId?: string } {
-		// Note: suppressPreviousResponseId is handled in handleResponsesApiMessage
-		// This method now only handles formatting based on whether we have a previous response ID
-
-		// Check for previous response ID from metadata or fallback to lastResponseId
-		const isFirstMessage = messages.length === 1 && messages[0].role === "user"
-		const previousResponseId = metadata?.previousResponseId ?? (!isFirstMessage ? this.lastResponseId : undefined)
-
-		if (previousResponseId) {
-			// When using previous_response_id, only send the latest user message
-			const lastUserMessage = [...messages].reverse().find((msg) => msg.role === "user")
-			if (lastUserMessage) {
-				const formattedMessage = this.formatSingleStructuredMessage(lastUserMessage)
-				// formatSingleStructuredMessage now always returns an object with role and content
-				if (formattedMessage) {
-					return { formattedInput: [formattedMessage], previousResponseId }
-				}
-			}
-			return { formattedInput: [], previousResponseId }
-		} else {
-			// Format full conversation history (returns an array of structured messages)
-			const formattedInput = this.formatFullConversation(systemPrompt, messages)
-			return { formattedInput }
-		}
-	}
-
 	/**
 	 * Handles the streaming response from the Responses API.
 	 *
@@ -675,14 +462,14 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 						try {
 							const parsed = JSON.parse(data)
 
-							// Store response ID for conversation continuity
-							if (parsed.response?.id) {
-								this.resolveResponseId(parsed.response.id)
-							}
 							// Capture resolved service tier if present
 							if (parsed.response?.service_tier) {
 								this.lastServiceTier = parsed.response.service_tier as ServiceTier
 							}
+							// Capture complete output array (includes reasoning items with encrypted_content)
+							if (parsed.response?.output && Array.isArray(parsed.response.output)) {
+								this.lastResponseOutput = parsed.response.output
+							}
 
 							// Delegate standard event types to the shared processor to avoid duplication
 							if (parsed?.type && this.coreHandledEventTypes.has(parsed.type)) {
@@ -970,14 +757,14 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 									)
 								}
 							} else if (parsed.type === "response.completed" || parsed.type === "response.done") {
-								// Store response ID for conversation continuity
-								if (parsed.response?.id) {
-									this.resolveResponseId(parsed.response.id)
-								}
 								// Capture resolved service tier if present
 								if (parsed.response?.service_tier) {
 									this.lastServiceTier = parsed.response.service_tier as ServiceTier
 								}
+								// Capture complete output array (includes reasoning items with encrypted_content)
+								if (parsed.response?.output && Array.isArray(parsed.response.output)) {
+									this.lastResponseOutput = parsed.response.output
+								}
 
 								// Check if the done event contains the complete output (as a fallback)
 								if (
@@ -1098,14 +885,14 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	 * Shared processor for Responses API events.
 	 */
 	private async *processEvent(event: any, model: OpenAiNativeModel): ApiStream {
-		// Persist response id for conversation continuity when available
-		if (event?.response?.id) {
-			this.resolveResponseId(event.response.id)
-		}
 		// Capture resolved service tier when available
 		if (event?.response?.service_tier) {
 			this.lastServiceTier = event.response.service_tier as ServiceTier
 		}
+		// Capture complete output array (includes reasoning items with encrypted_content)
+		if (event?.response?.output && Array.isArray(event.response.output)) {
+			this.lastResponseOutput = event.response.output
+		}
 
 		// Handle known streaming text deltas
 		if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") {
@@ -1251,21 +1038,25 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	}
 
 	/**
-	 * Gets the last response ID captured from the Responses API stream.
-	 * Used for maintaining conversation continuity across requests.
-	 * @returns The response ID, or undefined if not available yet
+	 * Extracts encrypted_content and id from the first reasoning item in the output array.
+	 * This is the minimal data needed for stateless API continuity.
+	 *
+	 * @returns Object with encrypted_content and id, or undefined if not available
 	 */
-	getLastResponseId(): string | undefined {
-		return this.lastResponseId
-	}
+	getEncryptedContent(): { encrypted_content: string; id?: string } | undefined {
+		if (!this.lastResponseOutput) return undefined
 
-	/**
-	 * Sets the last response ID for conversation continuity.
-	 * Typically only used in tests or special flows.
-	 * @param responseId The response ID to store
-	 */
-	setResponseId(responseId: string): void {
-		this.lastResponseId = responseId
+		// Find the first reasoning item with encrypted_content
+		const reasoningItem = this.lastResponseOutput.find(
+			(item) => item.type === "reasoning" && item.encrypted_content,
+		)
+
+		if (!reasoningItem?.encrypted_content) return undefined
+
+		return {
+			encrypted_content: reasoningItem.encrypted_content,
+			...(reasoningItem.id ? { id: reasoningItem.id } : {}),
+		}
 	}
 
 	async completePrompt(prompt: string): Promise<string> {
@@ -1287,6 +1078,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 				],
 				stream: false, // Non-streaming for completePrompt
 				store: false, // Don't store prompt completions
+				include: ["reasoning.encrypted_content"],
 			}
 
 			// Include service tier if selected and supported
diff --git a/src/core/task-persistence/__tests__/taskMessages.spec.ts b/src/core/task-persistence/__tests__/taskMessages.spec.ts
index ecd6225692..98148d6ed6 100644
--- a/src/core/task-persistence/__tests__/taskMessages.spec.ts
+++ b/src/core/task-persistence/__tests__/taskMessages.spec.ts
@@ -33,9 +33,6 @@ describe("taskMessages.saveTaskMessages", () => {
 				role: "assistant",
 				content: "Hello",
 				metadata: {
-					gpt5: {
-						previous_response_id: "resp_123",
-					},
 					other: "keep",
 				},
 			},
diff --git a/src/core/task-persistence/apiMessages.ts b/src/core/task-persistence/apiMessages.ts
index f846aaf13f..5beda00ddc 100644
--- a/src/core/task-persistence/apiMessages.ts
+++ b/src/core/task-persistence/apiMessages.ts
@@ -9,7 +9,15 @@ import { fileExistsAtPath } from "../../utils/fs"
 import { GlobalFileNames } from "../../shared/globalFileNames"
 import { getTaskDirectoryPath } from "../../utils/storage"
 
-export type ApiMessage = Anthropic.MessageParam & { ts?: number; isSummary?: boolean }
+export type ApiMessage = Anthropic.MessageParam & {
+	ts?: number
+	isSummary?: boolean
+	id?: string
+	// For reasoning items stored in API history
+	type?: "reasoning"
+	summary?: any[]
+	encrypted_content?: string
+}
 
 export async function readApiMessages({
 	taskId,
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
index 5d63189e3d..2f0d2d9086 100644
--- a/src/core/task/Task.ts
+++ b/src/core/task/Task.ts
@@ -114,7 +114,7 @@ import {
 } from "../checkpoints"
 import { processUserContentMentions } from "../mentions/processUserContentMentions"
 import { getMessagesSinceLastSummary, summarizeConversation } from "../condense"
-import { Gpt5Metadata, ClineMessageWithMetadata } from "./types"
+import { ClineMessageWithMetadata } from "./types"
 import { MessageQueueService } from "../message-queue/MessageQueueService"
 import { AutoApprovalHandler, checkAutoApproval } from "../auto-approval"
 
@@ -296,8 +296,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 	didAlreadyUseTool = false
 	didCompleteReadingStream = false
 	assistantMessageParser: AssistantMessageParser
-	private lastUsedInstructions?: string
-	private skipPrevResponseIdOnce: boolean = false
 
 	// Token Usage Cache
 	private tokenUsageSnapshot?: TokenUsage
@@ -599,8 +597,39 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 	}
 
 	private async addToApiConversationHistory(message: Anthropic.MessageParam) {
-		const messageWithTs = { ...message, ts: Date.now() }
-		this.apiConversationHistory.push(messageWithTs)
+		// Capture the encrypted_content from the provider (e.g., OpenAI Responses API) if present.
+		// We only persist data reported by the current response body.
+		const handler = this.api as ApiHandler & {
+			getResponseId?: () => string | undefined
+			getEncryptedContent?: () => { encrypted_content: string; id?: string } | undefined
+		}
+
+		if (message.role === "assistant") {
+			const responseId = handler.getResponseId?.()
+			const reasoningData = handler.getEncryptedContent?.()
+
+			// If we have encrypted_content, add it as a reasoning item before the assistant message
+			if (reasoningData?.encrypted_content) {
+				this.apiConversationHistory.push({
+					type: "reasoning",
+					summary: [],
+					encrypted_content: reasoningData.encrypted_content,
+					...(reasoningData.id ? { id: reasoningData.id } : {}),
+					ts: Date.now(),
+				} as any)
+			}
+
+			const messageWithTs = {
+				...message,
+				...(responseId ? { id: responseId } : {}),
+				ts: Date.now(),
+			}
+			this.apiConversationHistory.push(messageWithTs)
+		} else {
+			const messageWithTs = { ...message, ts: Date.now() }
+			this.apiConversationHistory.push(messageWithTs)
+		}
+
 		await this.saveApiConversationHistory()
 	}
 
@@ -647,18 +676,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
 	public async overwriteClineMessages(newMessages: ClineMessage[]) {
 		this.clineMessages = newMessages
-
-		// If deletion or history truncation leaves a condense_context as the last message,
-		// ensure the next API call suppresses previous_response_id so the condensed context is respected.
-		try {
-			const last = this.clineMessages.at(-1)
-			if (last && last.type === "say" && last.say === "condense_context") {
-				this.skipPrevResponseIdOnce = true
-			}
-		} catch {
-			// non-fatal
-		}
-
 		restoreTodoListForTask(this)
 		await this.saveClineMessages()
 	}
@@ -1089,9 +1106,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		}
 		await this.overwriteApiConversationHistory(messages)
 
-		// Set flag to skip previous_response_id on the next API call after manual condense
-		this.skipPrevResponseIdOnce = true
-
 		const contextCondense: ContextCondense = { summary, cost, newContextTokens, prevContextTokens }
 		await this.say(
 			"condense_context",
@@ -1117,7 +1131,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		progressStatus?: ToolProgressStatus,
 		options: {
 			isNonInteractive?: boolean
-			metadata?: Record<string, unknown>
 		} = {},
 		contextCondense?: ContextCondense,
 	): Promise<undefined> {
@@ -1155,7 +1168,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 						images,
 						partial,
 						contextCondense,
-						metadata: options.metadata,
 					})
 				}
 			} else {
@@ -1171,14 +1183,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 					lastMessage.images = images
 					lastMessage.partial = false
 					lastMessage.progressStatus = progressStatus
-					if (options.metadata) {
-						// Add metadata to the message
-						const messageWithMetadata = lastMessage as ClineMessage & ClineMessageWithMetadata
-						if (!messageWithMetadata.metadata) {
-							messageWithMetadata.metadata = {}
-						}
-						Object.assign(messageWithMetadata.metadata, options.metadata)
-					}
 
 					// Instead of streaming partialMessage events, we do a save
 					// and post like normal to persist to disk.
@@ -1201,7 +1205,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 						text,
 						images,
 						contextCondense,
-						metadata: options.metadata,
 					})
 				}
 			}
@@ -1296,20 +1299,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
 		const modifiedClineMessages = await this.getSavedClineMessages()
 
-		// Check for any stored GPT-5 response IDs in the message history.
-		const gpt5Messages = modifiedClineMessages.filter(
-			(m): m is ClineMessage & ClineMessageWithMetadata =>
-				m.type === "say" &&
-				m.say === "text" &&
-				!!(m as ClineMessageWithMetadata).metadata?.gpt5?.previous_response_id,
-		)
-
-		if (gpt5Messages.length > 0) {
-			const lastGpt5Message = gpt5Messages[gpt5Messages.length - 1]
-			// The lastGpt5Message contains the previous_response_id that can be
-			// used for continuity.
-		}
-
 		// Remove any resume messages that may have been added before.
 		const lastRelevantMessageIndex = findLastIndex(
 			modifiedClineMessages,
@@ -1720,10 +1709,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				role: "user",
 				content: [{ type: "text", text: `[new_task completed] Result: ${lastMessage}` }],
 			})
-
-			// Set skipPrevResponseIdOnce to ensure the next API call sends the full conversation
-			// including the subtask result, not just from before the subtask was created
-			this.skipPrevResponseIdOnce = true
 		} catch (error) {
 			this.providerRef
 				.deref()
@@ -2377,7 +2362,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 					}
 				}
 
-				await this.persistGpt5Metadata()
 				await this.saveClineMessages()
 				await this.providerRef.deref()?.postStateToWebview()
 
@@ -2762,7 +2746,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		Task.lastGlobalApiRequestTime = performance.now()
 
 		const systemPrompt = await this.getSystemPrompt()
-		this.lastUsedInstructions = systemPrompt
 		const { contextTokens } = this.getTokenUsage()
 
 		if (contextTokens) {
@@ -2800,10 +2783,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			if (truncateResult.error) {
 				await this.say("condense_context_error", truncateResult.error)
 			} else if (truncateResult.summary) {
-				// A condense operation occurred; for the next GPT‑5 API call we should NOT
-				// send previous_response_id so the request reflects the fresh condensed context.
-				this.skipPrevResponseIdOnce = true
-
 				const { summary, cost, prevContextTokens, newContextTokens = 0 } = truncateResult
 				const contextCondense: ContextCondense = { summary, cost, newContextTokens, prevContextTokens }
 				await this.say(
@@ -2820,8 +2799,20 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		}
 
 		const messagesSinceLastSummary = getMessagesSinceLastSummary(this.apiConversationHistory)
-		let cleanConversationHistory = maybeRemoveImageBlocks(messagesSinceLastSummary, this.api).map(
-			({ role, content }) => ({ role, content }),
+		let cleanConversationHistory: any[] = maybeRemoveImageBlocks(messagesSinceLastSummary, this.api).map(
+			(msg: any) => {
+				// Pass through reasoning items as-is (including id if present)
+				if (msg.type === "reasoning") {
+					return {
+						type: msg.type,
+						summary: msg.summary,
+						encrypted_content: msg.encrypted_content,
+						...(msg.id ? { id: msg.id } : {}),
+					}
+				}
+				// For regular messages, just return role and content
+				return { role: msg.role, content: msg.content }
+			},
 		)
 
 		// Check auto-approval limits
@@ -2836,48 +2827,12 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			throw new Error("Auto-approval limit reached and user did not approve continuation")
 		}
 
-		// Determine GPT‑5 previous_response_id from last persisted assistant turn (if available),
-		// unless a condense just occurred (skip once after condense).
-		let previousResponseId: string | undefined = undefined
-		try {
-			const modelId = this.api.getModel().id
-			if (modelId && modelId.startsWith("gpt-5") && !this.skipPrevResponseIdOnce) {
-				// Find the last assistant message that has a previous_response_id stored
-				const idx = findLastIndex(
-					this.clineMessages,
-					(m): m is ClineMessage & ClineMessageWithMetadata =>
-						m.type === "say" &&
-						m.say === "text" &&
-						!!(m as ClineMessageWithMetadata).metadata?.gpt5?.previous_response_id,
-				)
-				if (idx !== -1) {
-					// Use the previous_response_id from the last assistant message for this request
-					const message = this.clineMessages[idx] as ClineMessage & ClineMessageWithMetadata
-					previousResponseId = message.metadata?.gpt5?.previous_response_id
-				}
-			} else if (this.skipPrevResponseIdOnce) {
-				// Skipping previous_response_id due to recent condense operation - will send full conversation context
-			}
-		} catch (error) {
-			console.error(`[Task#${this.taskId}] Error retrieving GPT-5 response ID:`, error)
-			// non-fatal
-		}
-
 		const metadata: ApiHandlerCreateMessageMetadata = {
 			mode: mode,
 			taskId: this.taskId,
-			// Only include previousResponseId if we're NOT suppressing it
-			...(previousResponseId && !this.skipPrevResponseIdOnce ? { previousResponseId } : {}),
-			// If a condense just occurred, explicitly suppress continuity fallback for the next call
-			...(this.skipPrevResponseIdOnce ? { suppressPreviousResponseId: true } : {}),
-		}
-
-		// Reset skip flag after applying (it only affects the immediate next call)
-		if (this.skipPrevResponseIdOnce) {
-			this.skipPrevResponseIdOnce = false
 		}
 
-		const stream = this.api.createMessage(systemPrompt, cleanConversationHistory, metadata)
+		const stream = this.api.createMessage(systemPrompt, cleanConversationHistory as any, metadata)
 		const iterator = stream[Symbol.asyncIterator]()
 
 		try {
@@ -3080,41 +3035,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		}
 	}
 
-	/**
-	 * Persist GPT-5 per-turn metadata (previous_response_id only)
-	 * onto the last complete assistant say("text") message.
-	 *
-	 * Note: We do not persist system instructions or reasoning summaries.
-	 */
-	private async persistGpt5Metadata(): Promise<void> {
-		try {
-			const modelId = this.api.getModel().id
-			if (!modelId || !modelId.startsWith("gpt-5")) return
-
-			// Check if the API handler has a getLastResponseId method (OpenAiNativeHandler specific)
-			const handler = this.api as ApiHandler & { getLastResponseId?: () => string | undefined }
-			const lastResponseId = handler.getLastResponseId?.()
-			const idx = findLastIndex(
-				this.clineMessages,
-				(m) => m.type === "say" && m.say === "text" && m.partial !== true,
-			)
-			if (idx !== -1) {
-				const msg = this.clineMessages[idx] as ClineMessage & ClineMessageWithMetadata
-				if (!msg.metadata) {
-					msg.metadata = {}
-				}
-				const gpt5Metadata: Gpt5Metadata = {
-					...(msg.metadata.gpt5 ?? {}),
-					...(lastResponseId ? { previous_response_id: lastResponseId } : {}),
-				}
-				msg.metadata.gpt5 = gpt5Metadata
-			}
-		} catch (error) {
-			console.error(`[Task#${this.taskId}] Error persisting GPT-5 metadata:`, error)
-			// Non-fatal error in metadata persistence
-		}
-	}
-
 	// Getters
 
 	public get taskStatus(): TaskStatus {
diff --git a/src/core/task/__tests__/Task.spec.ts b/src/core/task/__tests__/Task.spec.ts
index b4f2e04163..a78ae555d4 100644
--- a/src/core/task/__tests__/Task.spec.ts
+++ b/src/core/task/__tests__/Task.spec.ts
@@ -1624,7 +1624,7 @@ describe("Cline", () => {
 	})
 
 	describe("Conversation continuity after condense and deletion", () => {
-		it("should set suppressPreviousResponseId when last message is condense_context", async () => {
+		it.skip("DEPRECATED - should set suppressPreviousResponseId when last message is condense_context", async () => {
 			// Arrange: create task
 			const task = new Task({
 				provider: mockProvider,
@@ -1675,15 +1675,7 @@ describe("Cline", () => {
 			const iterator = task.attemptApiRequest(0)
 			await iterator.next() // read first chunk to ensure call happened
 
-			// Assert: metadata includes suppressPreviousResponseId set to true
-			expect(createMessageSpy).toHaveBeenCalled()
-			const callArgs = createMessageSpy.mock.calls[0]
-			// Args: [systemPrompt, cleanConversationHistory, metadata]
-			const metadata = callArgs?.[2]
-			expect(metadata?.suppressPreviousResponseId).toBe(true)
-
-			// The skip flag should be reset after the call
-			expect((task as any).skipPrevResponseIdOnce).toBe(false)
+			// DEPRECATED: This test is for old suppressPreviousResponseId behavior
 		})
 	})
 	describe("abortTask", () => {
diff --git a/src/core/task/types.ts b/src/core/task/types.ts
index e3641590a6..d94e6d10ce 100644
--- a/src/core/task/types.ts
+++ b/src/core/task/types.ts
@@ -3,23 +3,10 @@
  */
 
 /**
- * GPT-5 specific metadata stored with assistant messages
- * for maintaining conversation continuity across requests
- */
-export interface Gpt5Metadata {
-	/**
-	 * The response ID from the previous GPT-5 API response
-	 * Used to maintain conversation continuity in subsequent requests
-	 */
-	previous_response_id?: string
-}
-
-/**
- * Extended ClineMessage type with GPT-5 metadata
+ * Extended ClineMessage type with metadata
  */
 export interface ClineMessageWithMetadata {
 	metadata?: {
-		gpt5?: Gpt5Metadata
 		[key: string]: any
 	}
 }

From 79dadf91d060378d409844ed51e8632dd50e0b19 Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Wed, 12 Nov 2025 11:39:27 -0700
Subject: [PATCH 2/6] fix(webview): remove unused metadata prop from
 ReasoningBlock render

---
 webview-ui/src/components/chat/ChatRow.tsx | 1 -
 1 file changed, 1 deletion(-)

diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx
index 4d446daeaa..aaaaaae09d 100644
--- a/webview-ui/src/components/chat/ChatRow.tsx
+++ b/webview-ui/src/components/chat/ChatRow.tsx
@@ -1062,7 +1062,6 @@ export const ChatRowContent = ({
 							ts={message.ts}
 							isStreaming={isStreaming}
 							isLast={isLast}
-							metadata={message.metadata as any}
 						/>
 					)
 				case "api_req_started":

From 82468c31ee39ae1e3d4af3b96d120d79b5bf9ba1 Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Wed, 12 Nov 2025 14:09:05 -0700
Subject: [PATCH 3/6] Responses API: retain response id for troubleshooting
 (not continuity)

Continuity is stateless via encrypted reasoning items that we persist and replay. We now capture the top-level response id in OpenAiNativeHandler and persist the assistant message id into api_conversation_history.json solely for debugging/correlation with provider logs; it is not used for continuity or control flow.

Also: silence request-body debug logging to avoid leaking prompts.
---
 src/api/providers/openai-native.ts | 22 +++++++++++++++++++++-
 src/core/task/Task.ts              |  1 -
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
index de974c6f62..1cf195a3ba 100644
--- a/src/api/providers/openai-native.ts
+++ b/src/api/providers/openai-native.ts
@@ -38,6 +38,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	private lastServiceTier: ServiceTier | undefined
 	// Complete response output array (includes reasoning items with encrypted_content)
 	private lastResponseOutput: any[] | undefined
+	// Last top-level response id from Responses API (for troubleshooting)
+	private lastResponseId: string | undefined
 
 	// Event types handled by the shared event processor to avoid duplication
 	private readonly coreHandledEventTypes = new Set<string>([
@@ -146,6 +148,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		this.lastServiceTier = undefined
 		// Reset output array to capture current response output items
 		this.lastResponseOutput = undefined
+		// Reset last response id for this request
+		this.lastResponseId = undefined
 
 		// Use Responses API for ALL models
 		const { verbosity, reasoning } = this.getModel()
@@ -167,7 +171,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		)
 
 		// Temporary debug logging
-		console.log("[OpenAI Native] Request body:", requestBody)
+		// console.log("[OpenAI Native] Request body:", requestBody)
 
 		// Make the request (pass systemPrompt and messages for potential retry)
 		yield* this.executeRequest(requestBody, model, metadata, systemPrompt, messages)
@@ -470,6 +474,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 							if (parsed.response?.output && Array.isArray(parsed.response.output)) {
 								this.lastResponseOutput = parsed.response.output
 							}
+							// Capture top-level response id
+							if (parsed.response?.id) {
+								this.lastResponseId = parsed.response.id as string
+							}
 
 							// Delegate standard event types to the shared processor to avoid duplication
 							if (parsed?.type && this.coreHandledEventTypes.has(parsed.type)) {
@@ -761,6 +769,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 								if (parsed.response?.service_tier) {
 									this.lastServiceTier = parsed.response.service_tier as ServiceTier
 								}
+								// Capture top-level response id
+								if (parsed.response?.id) {
+									this.lastResponseId = parsed.response.id as string
+								}
 								// Capture complete output array (includes reasoning items with encrypted_content)
 								if (parsed.response?.output && Array.isArray(parsed.response.output)) {
 									this.lastResponseOutput = parsed.response.output
@@ -893,6 +905,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		if (event?.response?.output && Array.isArray(event.response.output)) {
 			this.lastResponseOutput = event.response.output
 		}
+		// Capture top-level response id
+		if (event?.response?.id) {
+			this.lastResponseId = event.response.id as string
+		}
 
 		// Handle known streaming text deltas
 		if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") {
@@ -1059,6 +1075,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		}
 	}
 
+	getResponseId(): string | undefined {
+		return this.lastResponseId
+	}
+
 	async completePrompt(prompt: string): Promise<string> {
 		try {
 			const model = this.getModel()
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
index 2f0d2d9086..0a5f6f0c31 100644
--- a/src/core/task/Task.ts
+++ b/src/core/task/Task.ts
@@ -114,7 +114,6 @@ import {
 } from "../checkpoints"
 import { processUserContentMentions } from "../mentions/processUserContentMentions"
 import { getMessagesSinceLastSummary, summarizeConversation } from "../condense"
-import { ClineMessageWithMetadata } from "./types"
 import { MessageQueueService } from "../message-queue/MessageQueueService"
 import { AutoApprovalHandler, checkAutoApproval } from "../auto-approval"
 

From 74fc9558b9567c92927234e5a84a80e84a371614 Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Wed, 12 Nov 2025 14:20:32 -0700
Subject: [PATCH 4/6] remove DEPRECATED tests

---
 .../providers/__tests__/openai-native.spec.ts | 366 ------------------
 src/core/task/__tests__/Task.spec.ts          |  55 ---
 2 files changed, 421 deletions(-)

diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts
index b907dd1a91..405d275951 100644
--- a/src/api/providers/__tests__/openai-native.spec.ts
+++ b/src/api/providers/__tests__/openai-native.spec.ts
@@ -735,372 +735,6 @@ describe("OpenAiNativeHandler", () => {
 			expect(textChunks[0].text).toBe("Hello")
 		})
 
-		it.skip("should use stored response ID when metadata doesn't provide one - DEPRECATED", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest
-				.fn()
-				.mockResolvedValueOnce({
-					ok: true,
-					body: new ReadableStream({
-						start(controller) {
-							// First response with ID
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.done","response":{"id":"resp_789","output":[{"type":"text","content":[{"type":"text","text":"First"}]}],"usage":{"prompt_tokens":10,"completion_tokens":1}}}\n\n',
-								),
-							)
-							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-							controller.close()
-						},
-					}),
-				})
-				.mockResolvedValueOnce({
-					ok: true,
-					body: new ReadableStream({
-						start(controller) {
-							// Second response
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.output_item.added","item":{"type":"text","text":"Second"}}\n\n',
-								),
-							)
-							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-							controller.close()
-						},
-					}),
-				})
-			global.fetch = mockFetch as any
-
-			// Mock SDK to fail
-			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			// First request - establishes response ID
-			const stream1 = handler.createMessage(systemPrompt, messages)
-			for await (const chunk of stream1) {
-				// consume stream
-			}
-
-			// Second request without metadata - should use stored response ID
-			const stream2 = handler.createMessage(systemPrompt, messages, { taskId: "test-task" })
-			for await (const chunk of stream2) {
-				// consume stream
-			}
-
-			// DEPRECATED: This test is for old previous_response_id behavior
-		})
-
-		it.skip("should retry with full conversation when previous_response_id fails - DEPRECATED", async () => {
-			// This test verifies the fix for context loss bug when previous_response_id becomes invalid
-			const mockFetch = vitest
-				.fn()
-				// First call: fails with 400 error about invalid previous_response_id
-				.mockResolvedValueOnce({
-					ok: false,
-					status: 400,
-					text: async () => JSON.stringify({ error: { message: "Previous response not found" } }),
-				})
-				// Second call (retry): succeeds
-				.mockResolvedValueOnce({
-					ok: true,
-					body: new ReadableStream({
-						start(controller) {
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.output_item.added","item":{"type":"text","text":"Retry successful"}}\n\n',
-								),
-							)
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.done","response":{"id":"resp_new","usage":{"prompt_tokens":100,"completion_tokens":2}}}\n\n',
-								),
-							)
-							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-							controller.close()
-						},
-					}),
-				})
-			global.fetch = mockFetch as any
-
-			// Mock SDK to fail
-			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			// Prepare a multi-turn conversation
-			const conversationMessages: Anthropic.Messages.MessageParam[] = [
-				{ role: "user", content: "What is 2+2?" },
-				{ role: "assistant", content: "2+2 equals 4." },
-				{ role: "user", content: "What about 3+3?" },
-				{ role: "assistant", content: "3+3 equals 6." },
-				{ role: "user", content: "And 4+4?" }, // Latest message
-			]
-
-			// Call without previous_response_id
-			const stream = handler.createMessage(systemPrompt, conversationMessages, {
-				taskId: "test-task",
-			})
-
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Verify we got the successful response
-			const textChunks = chunks.filter((c) => c.type === "text")
-			expect(textChunks).toHaveLength(1)
-			expect(textChunks[0].text).toBe("Retry successful")
-
-			// Verify two requests were made
-			expect(mockFetch).toHaveBeenCalledTimes(2)
-
-			// First request: includes previous_response_id and only latest message
-			const firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body)
-			expect(firstCallBody.previous_response_id).toBe("resp_invalid")
-			expect(firstCallBody.input).toEqual([
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "And 4+4?" }],
-				},
-			])
-
-			// Second request (retry): NO previous_response_id, but FULL conversation history
-			const secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
-			expect(secondCallBody.previous_response_id).toBeUndefined()
-			expect(secondCallBody.instructions).toBe(systemPrompt)
-			// Should include the FULL conversation history
-			expect(secondCallBody.input).toEqual([
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "What is 2+2?" }],
-				},
-				{
-					role: "assistant",
-					content: [{ type: "output_text", text: "2+2 equals 4." }],
-				},
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "What about 3+3?" }],
-				},
-				{
-					role: "assistant",
-					content: [{ type: "output_text", text: "3+3 equals 6." }],
-				},
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "And 4+4?" }],
-				},
-			])
-		})
-
-		it.skip("should retry with full conversation when SDK returns 400 for invalid previous_response_id - DEPRECATED", async () => {
-			// Test the SDK path (executeRequest method) for handling invalid previous_response_id
-
-			// Mock SDK to return an async iterable that we can control
-			const createMockStream = (chunks: any[]) => {
-				return {
-					async *[Symbol.asyncIterator]() {
-						for (const chunk of chunks) {
-							yield chunk
-						}
-					},
-				}
-			}
-
-			// First call: SDK throws 400 error
-			mockResponsesCreate
-				.mockRejectedValueOnce({
-					status: 400,
-					message: "Previous response resp_invalid not found",
-				})
-				// Second call (retry): SDK succeeds with async iterable
-				.mockResolvedValueOnce(
-					createMockStream([
-						{ type: "response.text.delta", delta: "Context" },
-						{ type: "response.text.delta", delta: " preserved!" },
-						{
-							type: "response.done",
-							response: { id: "resp_new", usage: { prompt_tokens: 150, completion_tokens: 2 } },
-						},
-					]),
-				)
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			// Prepare a conversation with context
-			const conversationMessages: Anthropic.Messages.MessageParam[] = [
-				{ role: "user", content: "Remember the number 42" },
-				{ role: "assistant", content: "I'll remember 42." },
-				{ role: "user", content: "What number did I ask you to remember?" },
-			]
-
-			// Call without previous_response_id
-			const stream = handler.createMessage(systemPrompt, conversationMessages, {
-				taskId: "test-task",
-			})
-
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Verify we got the successful response
-			const textChunks = chunks.filter((c) => c.type === "text")
-			expect(textChunks).toHaveLength(2)
-			expect(textChunks[0].text).toBe("Context")
-			expect(textChunks[1].text).toBe(" preserved!")
-
-			// Verify two SDK calls were made
-			expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
-
-			// First SDK call: includes previous_response_id and only latest message
-			const firstCallBody = mockResponsesCreate.mock.calls[0][0]
-			expect(firstCallBody.previous_response_id).toBe("resp_invalid")
-			expect(firstCallBody.input).toEqual([
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "What number did I ask you to remember?" }],
-				},
-			])
-
-			// Second SDK call (retry): NO previous_response_id, but FULL conversation history
-			const secondCallBody = mockResponsesCreate.mock.calls[1][0]
-			expect(secondCallBody.previous_response_id).toBeUndefined()
-			expect(secondCallBody.instructions).toBe(systemPrompt)
-			// Should include the FULL conversation history to preserve context
-			expect(secondCallBody.input).toEqual([
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "Remember the number 42" }],
-				},
-				{
-					role: "assistant",
-					content: [{ type: "output_text", text: "I'll remember 42." }],
-				},
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "What number did I ask you to remember?" }],
-				},
-			])
-		})
-
-		it.skip("should only send latest message when using previous_response_id - DEPRECATED", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest
-				.fn()
-				.mockResolvedValueOnce({
-					ok: true,
-					body: new ReadableStream({
-						start(controller) {
-							// First response with ID
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.done","response":{"id":"resp_001","output":[{"type":"text","content":[{"type":"text","text":"First"}]}],"usage":{"prompt_tokens":50,"completion_tokens":1}}}\n\n',
-								),
-							)
-							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-							controller.close()
-						},
-					}),
-				})
-				.mockResolvedValueOnce({
-					ok: true,
-					body: new ReadableStream({
-						start(controller) {
-							// Second response
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.output_item.added","item":{"type":"text","text":"Second"}}\n\n',
-								),
-							)
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.done","response":{"id":"resp_002","usage":{"prompt_tokens":10,"completion_tokens":1}}}\n\n',
-								),
-							)
-							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-							controller.close()
-						},
-					}),
-				})
-			global.fetch = mockFetch as any
-
-			// Mock SDK to fail
-			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			// First request with full conversation
-			const firstMessages: Anthropic.Messages.MessageParam[] = [
-				{ role: "user", content: "Hello" },
-				{ role: "assistant", content: "Hi there!" },
-				{ role: "user", content: "How are you?" },
-			]
-
-			const stream1 = handler.createMessage(systemPrompt, firstMessages)
-			for await (const chunk of stream1) {
-				// consume stream
-			}
-
-			// Verify first request sends full conversation in structured format
-			let firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body)
-			expect(firstCallBody.instructions).toBe(systemPrompt)
-			expect(firstCallBody.input).toEqual([
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "Hello" }],
-				},
-				{
-					role: "assistant",
-					content: [{ type: "output_text", text: "Hi there!" }],
-				},
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "How are you?" }],
-				},
-			])
-			expect(firstCallBody.previous_response_id).toBeUndefined()
-
-			// Second request with previous_response_id - should only send latest message
-			const secondMessages: Anthropic.Messages.MessageParam[] = [
-				{ role: "user", content: "Hello" },
-				{ role: "assistant", content: "Hi there!" },
-				{ role: "user", content: "How are you?" },
-				{ role: "assistant", content: "I'm doing well!" },
-				{ role: "user", content: "What's the weather?" }, // Latest message
-			]
-
-			const stream2 = handler.createMessage(systemPrompt, secondMessages, {
-				taskId: "test-task",
-			})
-			for await (const chunk of stream2) {
-				// consume stream
-			}
-
-			// Verify second request only sends the latest user message in structured format
-			let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
-			expect(secondCallBody.input).toEqual([
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "What's the weather?" }],
-				},
-			])
-			expect(secondCallBody.previous_response_id).toBe("resp_001")
-		})
-
 		it("should format full conversation correctly", async () => {
 			const mockFetch = vitest.fn().mockResolvedValue({
 				ok: true,
diff --git a/src/core/task/__tests__/Task.spec.ts b/src/core/task/__tests__/Task.spec.ts
index a78ae555d4..36492eebc9 100644
--- a/src/core/task/__tests__/Task.spec.ts
+++ b/src/core/task/__tests__/Task.spec.ts
@@ -1623,61 +1623,6 @@ describe("Cline", () => {
 		})
 	})
 
-	describe("Conversation continuity after condense and deletion", () => {
-		it.skip("DEPRECATED - should set suppressPreviousResponseId when last message is condense_context", async () => {
-			// Arrange: create task
-			const task = new Task({
-				provider: mockProvider,
-				apiConfiguration: mockApiConfig,
-				task: "initial task",
-				startTask: false,
-			})
-
-			// Ensure provider state returns required fields for attemptApiRequest
-			mockProvider.getState = vi.fn().mockResolvedValue({
-				apiConfiguration: mockApiConfig,
-			})
-
-			// Simulate deletion that leaves a condense_context as the last message
-			const condenseMsg = {
-				ts: Date.now(),
-				type: "say" as const,
-				say: "condense_context" as const,
-				contextCondense: {
-					summary: "summarized",
-					cost: 0.001,
-					prevContextTokens: 1200,
-					newContextTokens: 400,
-				},
-			}
-			await task.overwriteClineMessages([condenseMsg])
-
-			// Spy and return a minimal successful stream to exercise attemptApiRequest
-			const mockStream = {
-				async *[Symbol.asyncIterator]() {
-					yield { type: "text", text: "ok" }
-				},
-				async next() {
-					return { done: true, value: { type: "text", text: "ok" } }
-				},
-				async return() {
-					return { done: true, value: undefined }
-				},
-				async throw(e: any) {
-					throw e
-				},
-				[Symbol.asyncDispose]: async () => {},
-			} as AsyncGenerator<ApiStreamChunk>
-
-			const createMessageSpy = vi.spyOn(task.api, "createMessage").mockReturnValue(mockStream)
-
-			// Act: initiate an API request
-			const iterator = task.attemptApiRequest(0)
-			await iterator.next() // read first chunk to ensure call happened
-
-			// DEPRECATED: This test is for old suppressPreviousResponseId behavior
-		})
-	})
 	describe("abortTask", () => {
 		it("should set abort flag and emit TaskAborted event", async () => {
 			const task = new Task({

From 9692add531893819abc3288cf449550d5bc5ccb7 Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Wed, 12 Nov 2025 14:41:52 -0700
Subject: [PATCH 5/6] chore: remove unused Task types file to satisfy knip CI

---
 src/core/task/types.ts | 12 ------------
 1 file changed, 12 deletions(-)
 delete mode 100644 src/core/task/types.ts

diff --git a/src/core/task/types.ts b/src/core/task/types.ts
deleted file mode 100644
index d94e6d10ce..0000000000
--- a/src/core/task/types.ts
+++ /dev/null
@@ -1,12 +0,0 @@
-/**
- * Type definitions for Task-related metadata
- */
-
-/**
- * Extended ClineMessage type with metadata
- */
-export interface ClineMessageWithMetadata {
-	metadata?: {
-		[key: string]: any
-	}
-}

From 7bd50718e4f6b93ba11767e3f9d71dd056321666 Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Wed, 12 Nov 2025 15:43:59 -0700
Subject: [PATCH 6/6] fix(task): properly type cleanConversationHistory and
 createMessage args in Task to address Dan's review

---
 src/core/task/Task.ts | 46 +++++++++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
index 0a5f6f0c31..4f2bdd72da 100644
--- a/src/core/task/Task.ts
+++ b/src/core/task/Task.ts
@@ -2797,22 +2797,33 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			}
 		}
 
+		// Properly type cleaned conversation history to include either standard Anthropic messages
+		// or provider-specific reasoning items (for encrypted continuity).
+		type ReasoningItemForRequest = {
+			type: "reasoning"
+			encrypted_content: string
+			id?: string
+			summary?: any[]
+		}
+		type CleanConversationMessage = Anthropic.Messages.MessageParam | ReasoningItemForRequest
+
 		const messagesSinceLastSummary = getMessagesSinceLastSummary(this.apiConversationHistory)
-		let cleanConversationHistory: any[] = maybeRemoveImageBlocks(messagesSinceLastSummary, this.api).map(
-			(msg: any) => {
-				// Pass through reasoning items as-is (including id if present)
-				if (msg.type === "reasoning") {
-					return {
-						type: msg.type,
-						summary: msg.summary,
-						encrypted_content: msg.encrypted_content,
-						...(msg.id ? { id: msg.id } : {}),
-					}
+		const cleanConversationHistory: CleanConversationMessage[] = maybeRemoveImageBlocks(
+			messagesSinceLastSummary,
+			this.api,
+		).map((msg: ApiMessage): CleanConversationMessage => {
+			// Pass through reasoning items as-is (including id if present)
+			if (msg.type === "reasoning") {
+				return {
+					type: "reasoning",
+					summary: msg.summary,
+					encrypted_content: msg.encrypted_content!,
+					...(msg.id ? { id: msg.id } : {}),
 				}
-				// For regular messages, just return role and content
-				return { role: msg.role, content: msg.content }
-			},
-		)
+			}
+			// For regular messages, just return role and content
+			return { role: msg.role!, content: msg.content as Anthropic.Messages.ContentBlockParam[] | string }
+		})
 
 		// Check auto-approval limits
 		const approvalResult = await this.autoApprovalHandler.checkAutoApprovalLimits(
@@ -2831,7 +2842,12 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			taskId: this.taskId,
 		}
 
-		const stream = this.api.createMessage(systemPrompt, cleanConversationHistory as any, metadata)
+		// The provider accepts reasoning items alongside standard messages; cast to the expected parameter type.
+		const stream = this.api.createMessage(
+			systemPrompt,
+			cleanConversationHistory as unknown as Anthropic.Messages.MessageParam[],
+			metadata,
+		)
 		const iterator = stream[Symbol.asyncIterator]()
 
 		try {