From 8d4a77fa5b0038b7d814099c7cea947691fa0cc2 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 5 Sep 2025 14:25:54 -0500 Subject: [PATCH] fix: preserve context by retrying with full conversation on invalid previous_response_id When the OpenAI Responses API returns a 400 error due to an invalid or expired previous_response_id, the code now properly re-prepares the full conversation history for the retry instead of just removing the ID and sending only the latest message. This fixes a critical bug where conversation context was completely lost when continuity failed, leading to degraded responses. Changes: - Modified executeRequest() to re-prepare input with full conversation on retry - Modified makeGpt5ResponsesAPIRequest() with the same fix for SSE fallback - Added comprehensive test coverage for both SDK and SSE retry paths - Tests verify retry sends full conversation, not just latest message --- .../providers/__tests__/openai-native.spec.ts | 202 ++++++++++++++++++ src/api/providers/openai-native.ts | 50 +++-- 2 files changed, 238 insertions(+), 14 deletions(-) diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts index 97499acce33c..618cdeac659b 100644 --- a/src/api/providers/__tests__/openai-native.spec.ts +++ b/src/api/providers/__tests__/openai-native.spec.ts @@ -859,6 +859,208 @@ describe("OpenAiNativeHandler", () => { expect(secondCallBody.previous_response_id).toBe("resp_789") }) + it("should retry with full conversation when previous_response_id fails", async () => { + // This test verifies the fix for context loss bug when previous_response_id becomes invalid + const mockFetch = vitest + .fn() + // First call: fails with 400 error about invalid previous_response_id + .mockResolvedValueOnce({ + ok: false, + status: 400, + text: async () => JSON.stringify({ error: { message: "Previous response not found" } }), + }) + // Second call (retry): succeeds + .mockResolvedValueOnce({ + ok: true, + body: new ReadableStream({ + start(controller) { + controller.enqueue( + new TextEncoder().encode( + 'data: {"type":"response.output_item.added","item":{"type":"text","text":"Retry successful"}}\n\n', + ), + ) + controller.enqueue( + new TextEncoder().encode( + 'data: {"type":"response.done","response":{"id":"resp_new","usage":{"prompt_tokens":100,"completion_tokens":2}}}\n\n', + ), + ) + controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) + controller.close() + }, + }), + }) + global.fetch = mockFetch as any + + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + + handler = new OpenAiNativeHandler({ + ...mockOptions, + apiModelId: "gpt-5-2025-08-07", + }) + + // Prepare a multi-turn conversation + const conversationMessages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "What is 2+2?" }, + { role: "assistant", content: "2+2 equals 4." }, + { role: "user", content: "What about 3+3?" }, + { role: "assistant", content: "3+3 equals 6." }, + { role: "user", content: "And 4+4?" }, // Latest message + ] + + // Call with a previous_response_id that will fail + const stream = handler.createMessage(systemPrompt, conversationMessages, { + taskId: "test-task", + previousResponseId: "resp_invalid", + }) + + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Verify we got the successful response + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks).toHaveLength(1) + expect(textChunks[0].text).toBe("Retry successful") + + // Verify two requests were made + expect(mockFetch).toHaveBeenCalledTimes(2) + + // First request: includes previous_response_id and only latest message + const firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body) + expect(firstCallBody.previous_response_id).toBe("resp_invalid") + expect(firstCallBody.input).toEqual([ + { + role: "user", + content: [{ type: "input_text", text: "And 4+4?" }], + }, + ]) + + // Second request (retry): NO previous_response_id, but FULL conversation history + const secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body) + expect(secondCallBody.previous_response_id).toBeUndefined() + expect(secondCallBody.instructions).toBe(systemPrompt) + // Should include the FULL conversation history + expect(secondCallBody.input).toEqual([ + { + role: "user", + content: [{ type: "input_text", text: "What is 2+2?" }], + }, + { + role: "assistant", + content: [{ type: "output_text", text: "2+2 equals 4." }], + }, + { + role: "user", + content: [{ type: "input_text", text: "What about 3+3?" }], + }, + { + role: "assistant", + content: [{ type: "output_text", text: "3+3 equals 6." }], + }, + { + role: "user", + content: [{ type: "input_text", text: "And 4+4?" }], + }, + ]) + }) + + it("should retry with full conversation when SDK returns 400 for invalid previous_response_id", async () => { + // Test the SDK path (executeRequest method) for handling invalid previous_response_id + + // Mock SDK to return an async iterable that we can control + const createMockStream = (chunks: any[]) => { + return { + async *[Symbol.asyncIterator]() { + for (const chunk of chunks) { + yield chunk + } + }, + } + } + + // First call: SDK throws 400 error + mockResponsesCreate + .mockRejectedValueOnce({ + status: 400, + message: "Previous response resp_invalid not found", + }) + // Second call (retry): SDK succeeds with async iterable + .mockResolvedValueOnce( + createMockStream([ + { type: "response.text.delta", delta: "Context" }, + { type: "response.text.delta", delta: " preserved!" }, + { + type: "response.done", + response: { id: "resp_new", usage: { prompt_tokens: 150, completion_tokens: 2 } }, + }, + ]), + ) + + handler = new OpenAiNativeHandler({ + ...mockOptions, + apiModelId: "gpt-5-2025-08-07", + }) + + // Prepare a conversation with context + const conversationMessages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "Remember the number 42" }, + { role: "assistant", content: "I'll remember 42." }, + { role: "user", content: "What number did I ask you to remember?" }, + ] + + // Call with a previous_response_id that will fail + const stream = handler.createMessage(systemPrompt, conversationMessages, { + taskId: "test-task", + previousResponseId: "resp_invalid", + }) + + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Verify we got the successful response + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks).toHaveLength(2) + expect(textChunks[0].text).toBe("Context") + expect(textChunks[1].text).toBe(" preserved!") + + // Verify two SDK calls were made + expect(mockResponsesCreate).toHaveBeenCalledTimes(2) + + // First SDK call: includes previous_response_id and only latest message + const firstCallBody = mockResponsesCreate.mock.calls[0][0] + expect(firstCallBody.previous_response_id).toBe("resp_invalid") + expect(firstCallBody.input).toEqual([ + { + role: "user", + content: [{ type: "input_text", text: "What number did I ask you to remember?" }], + }, + ]) + + // Second SDK call (retry): NO previous_response_id, but FULL conversation history + const secondCallBody = mockResponsesCreate.mock.calls[1][0] + expect(secondCallBody.previous_response_id).toBeUndefined() + expect(secondCallBody.instructions).toBe(systemPrompt) + // Should include the FULL conversation history to preserve context + expect(secondCallBody.input).toEqual([ + { + role: "user", + content: [{ type: "input_text", text: "Remember the number 42" }], + }, + { + role: "assistant", + content: [{ type: "output_text", text: "I'll remember 42." }], + }, + { + role: "user", + content: [{ type: "input_text", text: "What number did I ask you to remember?" }], + }, + ]) + }) + it("should only send latest message when using previous_response_id", async () => { // Mock fetch for Responses API const mockFetch = vitest diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index d7c7eb834828..8a205a06b453 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -217,8 +217,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio metadata, ) - // Make the request - yield* this.executeRequest(requestBody, model, metadata) + // Make the request (pass systemPrompt and messages for potential retry) + yield* this.executeRequest(requestBody, model, metadata, systemPrompt, messages) } private buildRequestBody( @@ -297,6 +297,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio requestBody: any, model: OpenAiNativeModel, metadata?: ApiHandlerCreateMessageMetadata, + systemPrompt?: string, + messages?: Anthropic.Messages.MessageParam[], ): ApiStream { try { // Use the official SDK @@ -323,13 +325,19 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio if (is400Error && requestBody.previous_response_id && isPreviousResponseError) { // Log the error and retry without the previous_response_id - // Remove the problematic previous_response_id and retry - const retryRequestBody = { ...requestBody } - delete retryRequestBody.previous_response_id - // Clear the stored lastResponseId to prevent using it again this.lastResponseId = undefined + // Re-prepare the full conversation without previous_response_id + let retryRequestBody = { ...requestBody } + delete retryRequestBody.previous_response_id + + // If we have the original messages, re-prepare the full conversation + if (systemPrompt && messages) { + const { formattedInput } = this.prepareStructuredInput(systemPrompt, messages, undefined) + retryRequestBody.input = formattedInput + } + try { // Retry with the SDK const retryStream = (await (this.client as any).responses.create( @@ -338,7 +346,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio if (typeof (retryStream as any)[Symbol.asyncIterator] !== "function") { // If SDK fails, fall back to SSE - yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata) + yield* this.makeGpt5ResponsesAPIRequest( + retryRequestBody, + model, + metadata, + systemPrompt, + messages, + ) return } @@ -350,13 +364,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio return } catch (retryErr) { // If retry also fails, fall back to SSE - yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata) + yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata, systemPrompt, messages) return } } // For other errors, fallback to manual SSE via fetch - yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata) + yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata, systemPrompt, messages) } } @@ -445,6 +459,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio requestBody: any, model: OpenAiNativeModel, metadata?: ApiHandlerCreateMessageMetadata, + systemPrompt?: string, + messages?: Anthropic.Messages.MessageParam[], ): ApiStream { const apiKey = this.options.openAiNativeApiKey ?? "not-provided" const baseUrl = this.options.openAiNativeBaseUrl || "https://api.openai.com" @@ -489,16 +505,22 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio if (response.status === 400 && requestBody.previous_response_id && isPreviousResponseError) { // Log the error and retry without the previous_response_id - // Remove the problematic previous_response_id and retry - const retryRequestBody = { ...requestBody } - delete retryRequestBody.previous_response_id - // Clear the stored lastResponseId to prevent using it again this.lastResponseId = undefined // Resolve the promise once to unblock any waiting requests this.resolveResponseId(undefined) - // Retry the request without the previous_response_id + // Re-prepare the full conversation without previous_response_id + let retryRequestBody = { ...requestBody } + delete retryRequestBody.previous_response_id + + // If we have the original messages, re-prepare the full conversation + if (systemPrompt && messages) { + const { formattedInput } = this.prepareStructuredInput(systemPrompt, messages, undefined) + retryRequestBody.input = formattedInput + } + + // Retry the request with full conversation context const retryResponse = await fetch(url, { method: "POST", headers: {