fix: preserve context by retrying with full conversation on invalid previous_response_id

daniel-lxs · daniel-lxs · commit 8d4a77fa5b00 · 2025-09-05T14:25:54.000-05:00
When the OpenAI Responses API returns a 400 error due to an invalid or expired
previous_response_id, the code now properly re-prepares the full conversation
history for the retry instead of just removing the ID and sending only the
latest message.

This fixes a critical bug where conversation context was completely lost when
continuity failed, leading to degraded responses.

Changes:
- Modified executeRequest() to re-prepare input with full conversation on retry
- Modified makeGpt5ResponsesAPIRequest() with the same fix for SSE fallback
- Added comprehensive test coverage for both SDK and SSE retry paths
- Tests verify retry sends full conversation, not just latest message
diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts
@@ -859,6 +859,208 @@ describe("OpenAiNativeHandler", () => {
 			expect(secondCallBody.previous_response_id).toBe("resp_789")
 		})
 
+		it("should retry with full conversation when previous_response_id fails", async () => {
+			// This test verifies the fix for context loss bug when previous_response_id becomes invalid
+			const mockFetch = vitest
+				.fn()
+				// First call: fails with 400 error about invalid previous_response_id
+				.mockResolvedValueOnce({
+					ok: false,
+					status: 400,
+					text: async () => JSON.stringify({ error: { message: "Previous response not found" } }),
+				})
+				// Second call (retry): succeeds
+				.mockResolvedValueOnce({
+					ok: true,
+					body: new ReadableStream({
+						start(controller) {
+							controller.enqueue(
+								new TextEncoder().encode(
+									'data: {"type":"response.output_item.added","item":{"type":"text","text":"Retry successful"}}\n\n',
+								),
+							)
+							controller.enqueue(
+								new TextEncoder().encode(
+									'data: {"type":"response.done","response":{"id":"resp_new","usage":{"prompt_tokens":100,"completion_tokens":2}}}\n\n',
+								),
+							)
+							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+							controller.close()
+						},
+					}),
+				})
+			global.fetch = mockFetch as any
+
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "gpt-5-2025-08-07",
+			})
+
+			// Prepare a multi-turn conversation
+			const conversationMessages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "What is 2+2?" },
+				{ role: "assistant", content: "2+2 equals 4." },
+				{ role: "user", content: "What about 3+3?" },
+				{ role: "assistant", content: "3+3 equals 6." },
+				{ role: "user", content: "And 4+4?" }, // Latest message
+			]
+
+			// Call with a previous_response_id that will fail
+			const stream = handler.createMessage(systemPrompt, conversationMessages, {
+				taskId: "test-task",
+				previousResponseId: "resp_invalid",
+			})
+
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Verify we got the successful response
+			const textChunks = chunks.filter((c) => c.type === "text")
+			expect(textChunks).toHaveLength(1)
+			expect(textChunks[0].text).toBe("Retry successful")
+
+			// Verify two requests were made
+			expect(mockFetch).toHaveBeenCalledTimes(2)
+
+			// First request: includes previous_response_id and only latest message
+			const firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body)
+			expect(firstCallBody.previous_response_id).toBe("resp_invalid")
+			expect(firstCallBody.input).toEqual([
+				{
+					role: "user",
+					content: [{ type: "input_text", text: "And 4+4?" }],
+				},
+			])
+
+			// Second request (retry): NO previous_response_id, but FULL conversation history
+			const secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
+			expect(secondCallBody.previous_response_id).toBeUndefined()
+			expect(secondCallBody.instructions).toBe(systemPrompt)
+			// Should include the FULL conversation history
+			expect(secondCallBody.input).toEqual([
+				{
+					role: "user",
+					content: [{ type: "input_text", text: "What is 2+2?" }],
+				},
+				{
+					role: "assistant",
+					content: [{ type: "output_text", text: "2+2 equals 4." }],
+				},
+				{
+					role: "user",
+					content: [{ type: "input_text", text: "What about 3+3?" }],
+				},
+				{
+					role: "assistant",
+					content: [{ type: "output_text", text: "3+3 equals 6." }],
+				},
+				{
+					role: "user",
+					content: [{ type: "input_text", text: "And 4+4?" }],
+				},
+			])
+		})
+
+		it("should retry with full conversation when SDK returns 400 for invalid previous_response_id", async () => {
+			// Test the SDK path (executeRequest method) for handling invalid previous_response_id
+
+			// Mock SDK to return an async iterable that we can control
+			const createMockStream = (chunks: any[]) => {
+				return {
+					async *[Symbol.asyncIterator]() {
+						for (const chunk of chunks) {
+							yield chunk
+						}
+					},
+				}
+			}
+
+			// First call: SDK throws 400 error
+			mockResponsesCreate
+				.mockRejectedValueOnce({
+					status: 400,
+					message: "Previous response resp_invalid not found",
+				})
+				// Second call (retry): SDK succeeds with async iterable
+				.mockResolvedValueOnce(
+					createMockStream([
+						{ type: "response.text.delta", delta: "Context" },
+						{ type: "response.text.delta", delta: " preserved!" },
+						{
+							type: "response.done",
+							response: { id: "resp_new", usage: { prompt_tokens: 150, completion_tokens: 2 } },
+						},
+					]),
+				)
+
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "gpt-5-2025-08-07",
+			})
+
+			// Prepare a conversation with context
+			const conversationMessages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "Remember the number 42" },
+				{ role: "assistant", content: "I'll remember 42." },
+				{ role: "user", content: "What number did I ask you to remember?" },
+			]
+
+			// Call with a previous_response_id that will fail
+			const stream = handler.createMessage(systemPrompt, conversationMessages, {
+				taskId: "test-task",
+				previousResponseId: "resp_invalid",
+			})
+
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Verify we got the successful response
+			const textChunks = chunks.filter((c) => c.type === "text")
+			expect(textChunks).toHaveLength(2)
+			expect(textChunks[0].text).toBe("Context")
+			expect(textChunks[1].text).toBe(" preserved!")
+
+			// Verify two SDK calls were made
+			expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
+
+			// First SDK call: includes previous_response_id and only latest message
+			const firstCallBody = mockResponsesCreate.mock.calls[0][0]
+			expect(firstCallBody.previous_response_id).toBe("resp_invalid")
+			expect(firstCallBody.input).toEqual([
+				{
+					role: "user",
+					content: [{ type: "input_text", text: "What number did I ask you to remember?" }],
+				},
+			])
+
+			// Second SDK call (retry): NO previous_response_id, but FULL conversation history
+			const secondCallBody = mockResponsesCreate.mock.calls[1][0]
+			expect(secondCallBody.previous_response_id).toBeUndefined()
+			expect(secondCallBody.instructions).toBe(systemPrompt)
+			// Should include the FULL conversation history to preserve context
+			expect(secondCallBody.input).toEqual([
+				{
+					role: "user",
+					content: [{ type: "input_text", text: "Remember the number 42" }],
+				},
+				{
+					role: "assistant",
+					content: [{ type: "output_text", text: "I'll remember 42." }],
+				},
+				{
+					role: "user",
+					content: [{ type: "input_text", text: "What number did I ask you to remember?" }],
+				},
+			])
+		})
+
 		it("should only send latest message when using previous_response_id", async () => {
 			// Mock fetch for Responses API
 			const mockFetch = vitest
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
@@ -217,8 +217,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			metadata,
 		)
 
-		// Make the request
-		yield* this.executeRequest(requestBody, model, metadata)
+		// Make the request (pass systemPrompt and messages for potential retry)
+		yield* this.executeRequest(requestBody, model, metadata, systemPrompt, messages)
 	}
 
 	private buildRequestBody(
@@ -297,6 +297,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		requestBody: any,
 		model: OpenAiNativeModel,
 		metadata?: ApiHandlerCreateMessageMetadata,
+		systemPrompt?: string,
+		messages?: Anthropic.Messages.MessageParam[],
 	): ApiStream {
 		try {
 			// Use the official SDK
@@ -323,13 +325,19 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			if (is400Error && requestBody.previous_response_id && isPreviousResponseError) {
 				// Log the error and retry without the previous_response_id
 
-				// Remove the problematic previous_response_id and retry
-				const retryRequestBody = { ...requestBody }
-				delete retryRequestBody.previous_response_id
-
 				// Clear the stored lastResponseId to prevent using it again
 				this.lastResponseId = undefined
 
+				// Re-prepare the full conversation without previous_response_id
+				let retryRequestBody = { ...requestBody }
+				delete retryRequestBody.previous_response_id
+
+				// If we have the original messages, re-prepare the full conversation
+				if (systemPrompt && messages) {
+					const { formattedInput } = this.prepareStructuredInput(systemPrompt, messages, undefined)
+					retryRequestBody.input = formattedInput
+				}
+
 				try {
 					// Retry with the SDK
 					const retryStream = (await (this.client as any).responses.create(
@@ -338,7 +346,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
 					if (typeof (retryStream as any)[Symbol.asyncIterator] !== "function") {
 						// If SDK fails, fall back to SSE
-						yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata)
+						yield* this.makeGpt5ResponsesAPIRequest(
+							retryRequestBody,
+							model,
+							metadata,
+							systemPrompt,
+							messages,
+						)
 						return
 					}
 
@@ -350,13 +364,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 					return
 				} catch (retryErr) {
 					// If retry also fails, fall back to SSE
-					yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata)
+					yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata, systemPrompt, messages)
 					return
 				}
 			}
 
 			// For other errors, fallback to manual SSE via fetch
-			yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata)
+			yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata, systemPrompt, messages)
 		}
 	}
 
@@ -445,6 +459,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		requestBody: any,
 		model: OpenAiNativeModel,
 		metadata?: ApiHandlerCreateMessageMetadata,
+		systemPrompt?: string,
+		messages?: Anthropic.Messages.MessageParam[],
 	): ApiStream {
 		const apiKey = this.options.openAiNativeApiKey ?? "not-provided"
 		const baseUrl = this.options.openAiNativeBaseUrl || "https://api.openai.com"
@@ -489,16 +505,22 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 				if (response.status === 400 && requestBody.previous_response_id && isPreviousResponseError) {
 					// Log the error and retry without the previous_response_id
 
-					// Remove the problematic previous_response_id and retry
-					const retryRequestBody = { ...requestBody }
-					delete retryRequestBody.previous_response_id
-
 					// Clear the stored lastResponseId to prevent using it again
 					this.lastResponseId = undefined
 					// Resolve the promise once to unblock any waiting requests
 					this.resolveResponseId(undefined)
 
-					// Retry the request without the previous_response_id
+					// Re-prepare the full conversation without previous_response_id
+					let retryRequestBody = { ...requestBody }
+					delete retryRequestBody.previous_response_id
+
+					// If we have the original messages, re-prepare the full conversation
+					if (systemPrompt && messages) {
+						const { formattedInput } = this.prepareStructuredInput(systemPrompt, messages, undefined)
+						retryRequestBody.input = formattedInput
+					}
+
+					// Retry the request with full conversation context
 					const retryResponse = await fetch(url, {
 						method: "POST",
 						headers: {