Skip to content

Commit 8d4a77f

Browse files
committed
fix: preserve context by retrying with full conversation on invalid previous_response_id
When the OpenAI Responses API returns a 400 error due to an invalid or expired previous_response_id, the code now properly re-prepares the full conversation history for the retry instead of just removing the ID and sending only the latest message. This fixes a critical bug where conversation context was completely lost when continuity failed, leading to degraded responses. Changes: - Modified executeRequest() to re-prepare input with full conversation on retry - Modified makeGpt5ResponsesAPIRequest() with the same fix for SSE fallback - Added comprehensive test coverage for both SDK and SSE retry paths - Tests verify retry sends full conversation, not just latest message
1 parent c206da4 commit 8d4a77f

File tree

2 files changed

+238
-14
lines changed

2 files changed

+238
-14
lines changed

src/api/providers/__tests__/openai-native.spec.ts

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,208 @@ describe("OpenAiNativeHandler", () => {
859859
expect(secondCallBody.previous_response_id).toBe("resp_789")
860860
})
861861

862+
it("should retry with full conversation when previous_response_id fails", async () => {
863+
// This test verifies the fix for context loss bug when previous_response_id becomes invalid
864+
const mockFetch = vitest
865+
.fn()
866+
// First call: fails with 400 error about invalid previous_response_id
867+
.mockResolvedValueOnce({
868+
ok: false,
869+
status: 400,
870+
text: async () => JSON.stringify({ error: { message: "Previous response not found" } }),
871+
})
872+
// Second call (retry): succeeds
873+
.mockResolvedValueOnce({
874+
ok: true,
875+
body: new ReadableStream({
876+
start(controller) {
877+
controller.enqueue(
878+
new TextEncoder().encode(
879+
'data: {"type":"response.output_item.added","item":{"type":"text","text":"Retry successful"}}\n\n',
880+
),
881+
)
882+
controller.enqueue(
883+
new TextEncoder().encode(
884+
'data: {"type":"response.done","response":{"id":"resp_new","usage":{"prompt_tokens":100,"completion_tokens":2}}}\n\n',
885+
),
886+
)
887+
controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
888+
controller.close()
889+
},
890+
}),
891+
})
892+
global.fetch = mockFetch as any
893+
894+
// Mock SDK to fail
895+
mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
896+
897+
handler = new OpenAiNativeHandler({
898+
...mockOptions,
899+
apiModelId: "gpt-5-2025-08-07",
900+
})
901+
902+
// Prepare a multi-turn conversation
903+
const conversationMessages: Anthropic.Messages.MessageParam[] = [
904+
{ role: "user", content: "What is 2+2?" },
905+
{ role: "assistant", content: "2+2 equals 4." },
906+
{ role: "user", content: "What about 3+3?" },
907+
{ role: "assistant", content: "3+3 equals 6." },
908+
{ role: "user", content: "And 4+4?" }, // Latest message
909+
]
910+
911+
// Call with a previous_response_id that will fail
912+
const stream = handler.createMessage(systemPrompt, conversationMessages, {
913+
taskId: "test-task",
914+
previousResponseId: "resp_invalid",
915+
})
916+
917+
const chunks: any[] = []
918+
for await (const chunk of stream) {
919+
chunks.push(chunk)
920+
}
921+
922+
// Verify we got the successful response
923+
const textChunks = chunks.filter((c) => c.type === "text")
924+
expect(textChunks).toHaveLength(1)
925+
expect(textChunks[0].text).toBe("Retry successful")
926+
927+
// Verify two requests were made
928+
expect(mockFetch).toHaveBeenCalledTimes(2)
929+
930+
// First request: includes previous_response_id and only latest message
931+
const firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body)
932+
expect(firstCallBody.previous_response_id).toBe("resp_invalid")
933+
expect(firstCallBody.input).toEqual([
934+
{
935+
role: "user",
936+
content: [{ type: "input_text", text: "And 4+4?" }],
937+
},
938+
])
939+
940+
// Second request (retry): NO previous_response_id, but FULL conversation history
941+
const secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
942+
expect(secondCallBody.previous_response_id).toBeUndefined()
943+
expect(secondCallBody.instructions).toBe(systemPrompt)
944+
// Should include the FULL conversation history
945+
expect(secondCallBody.input).toEqual([
946+
{
947+
role: "user",
948+
content: [{ type: "input_text", text: "What is 2+2?" }],
949+
},
950+
{
951+
role: "assistant",
952+
content: [{ type: "output_text", text: "2+2 equals 4." }],
953+
},
954+
{
955+
role: "user",
956+
content: [{ type: "input_text", text: "What about 3+3?" }],
957+
},
958+
{
959+
role: "assistant",
960+
content: [{ type: "output_text", text: "3+3 equals 6." }],
961+
},
962+
{
963+
role: "user",
964+
content: [{ type: "input_text", text: "And 4+4?" }],
965+
},
966+
])
967+
})
968+
969+
it("should retry with full conversation when SDK returns 400 for invalid previous_response_id", async () => {
970+
// Test the SDK path (executeRequest method) for handling invalid previous_response_id
971+
972+
// Mock SDK to return an async iterable that we can control
973+
const createMockStream = (chunks: any[]) => {
974+
return {
975+
async *[Symbol.asyncIterator]() {
976+
for (const chunk of chunks) {
977+
yield chunk
978+
}
979+
},
980+
}
981+
}
982+
983+
// First call: SDK throws 400 error
984+
mockResponsesCreate
985+
.mockRejectedValueOnce({
986+
status: 400,
987+
message: "Previous response resp_invalid not found",
988+
})
989+
// Second call (retry): SDK succeeds with async iterable
990+
.mockResolvedValueOnce(
991+
createMockStream([
992+
{ type: "response.text.delta", delta: "Context" },
993+
{ type: "response.text.delta", delta: " preserved!" },
994+
{
995+
type: "response.done",
996+
response: { id: "resp_new", usage: { prompt_tokens: 150, completion_tokens: 2 } },
997+
},
998+
]),
999+
)
1000+
1001+
handler = new OpenAiNativeHandler({
1002+
...mockOptions,
1003+
apiModelId: "gpt-5-2025-08-07",
1004+
})
1005+
1006+
// Prepare a conversation with context
1007+
const conversationMessages: Anthropic.Messages.MessageParam[] = [
1008+
{ role: "user", content: "Remember the number 42" },
1009+
{ role: "assistant", content: "I'll remember 42." },
1010+
{ role: "user", content: "What number did I ask you to remember?" },
1011+
]
1012+
1013+
// Call with a previous_response_id that will fail
1014+
const stream = handler.createMessage(systemPrompt, conversationMessages, {
1015+
taskId: "test-task",
1016+
previousResponseId: "resp_invalid",
1017+
})
1018+
1019+
const chunks: any[] = []
1020+
for await (const chunk of stream) {
1021+
chunks.push(chunk)
1022+
}
1023+
1024+
// Verify we got the successful response
1025+
const textChunks = chunks.filter((c) => c.type === "text")
1026+
expect(textChunks).toHaveLength(2)
1027+
expect(textChunks[0].text).toBe("Context")
1028+
expect(textChunks[1].text).toBe(" preserved!")
1029+
1030+
// Verify two SDK calls were made
1031+
expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
1032+
1033+
// First SDK call: includes previous_response_id and only latest message
1034+
const firstCallBody = mockResponsesCreate.mock.calls[0][0]
1035+
expect(firstCallBody.previous_response_id).toBe("resp_invalid")
1036+
expect(firstCallBody.input).toEqual([
1037+
{
1038+
role: "user",
1039+
content: [{ type: "input_text", text: "What number did I ask you to remember?" }],
1040+
},
1041+
])
1042+
1043+
// Second SDK call (retry): NO previous_response_id, but FULL conversation history
1044+
const secondCallBody = mockResponsesCreate.mock.calls[1][0]
1045+
expect(secondCallBody.previous_response_id).toBeUndefined()
1046+
expect(secondCallBody.instructions).toBe(systemPrompt)
1047+
// Should include the FULL conversation history to preserve context
1048+
expect(secondCallBody.input).toEqual([
1049+
{
1050+
role: "user",
1051+
content: [{ type: "input_text", text: "Remember the number 42" }],
1052+
},
1053+
{
1054+
role: "assistant",
1055+
content: [{ type: "output_text", text: "I'll remember 42." }],
1056+
},
1057+
{
1058+
role: "user",
1059+
content: [{ type: "input_text", text: "What number did I ask you to remember?" }],
1060+
},
1061+
])
1062+
})
1063+
8621064
it("should only send latest message when using previous_response_id", async () => {
8631065
// Mock fetch for Responses API
8641066
const mockFetch = vitest

src/api/providers/openai-native.ts

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
217217
metadata,
218218
)
219219

220-
// Make the request
221-
yield* this.executeRequest(requestBody, model, metadata)
220+
// Make the request (pass systemPrompt and messages for potential retry)
221+
yield* this.executeRequest(requestBody, model, metadata, systemPrompt, messages)
222222
}
223223

224224
private buildRequestBody(
@@ -297,6 +297,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
297297
requestBody: any,
298298
model: OpenAiNativeModel,
299299
metadata?: ApiHandlerCreateMessageMetadata,
300+
systemPrompt?: string,
301+
messages?: Anthropic.Messages.MessageParam[],
300302
): ApiStream {
301303
try {
302304
// Use the official SDK
@@ -323,13 +325,19 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
323325
if (is400Error && requestBody.previous_response_id && isPreviousResponseError) {
324326
// Log the error and retry without the previous_response_id
325327

326-
// Remove the problematic previous_response_id and retry
327-
const retryRequestBody = { ...requestBody }
328-
delete retryRequestBody.previous_response_id
329-
330328
// Clear the stored lastResponseId to prevent using it again
331329
this.lastResponseId = undefined
332330

331+
// Re-prepare the full conversation without previous_response_id
332+
let retryRequestBody = { ...requestBody }
333+
delete retryRequestBody.previous_response_id
334+
335+
// If we have the original messages, re-prepare the full conversation
336+
if (systemPrompt && messages) {
337+
const { formattedInput } = this.prepareStructuredInput(systemPrompt, messages, undefined)
338+
retryRequestBody.input = formattedInput
339+
}
340+
333341
try {
334342
// Retry with the SDK
335343
const retryStream = (await (this.client as any).responses.create(
@@ -338,7 +346,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
338346

339347
if (typeof (retryStream as any)[Symbol.asyncIterator] !== "function") {
340348
// If SDK fails, fall back to SSE
341-
yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata)
349+
yield* this.makeGpt5ResponsesAPIRequest(
350+
retryRequestBody,
351+
model,
352+
metadata,
353+
systemPrompt,
354+
messages,
355+
)
342356
return
343357
}
344358

@@ -350,13 +364,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
350364
return
351365
} catch (retryErr) {
352366
// If retry also fails, fall back to SSE
353-
yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata)
367+
yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata, systemPrompt, messages)
354368
return
355369
}
356370
}
357371

358372
// For other errors, fallback to manual SSE via fetch
359-
yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata)
373+
yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata, systemPrompt, messages)
360374
}
361375
}
362376

@@ -445,6 +459,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
445459
requestBody: any,
446460
model: OpenAiNativeModel,
447461
metadata?: ApiHandlerCreateMessageMetadata,
462+
systemPrompt?: string,
463+
messages?: Anthropic.Messages.MessageParam[],
448464
): ApiStream {
449465
const apiKey = this.options.openAiNativeApiKey ?? "not-provided"
450466
const baseUrl = this.options.openAiNativeBaseUrl || "https://api.openai.com"
@@ -489,16 +505,22 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
489505
if (response.status === 400 && requestBody.previous_response_id && isPreviousResponseError) {
490506
// Log the error and retry without the previous_response_id
491507

492-
// Remove the problematic previous_response_id and retry
493-
const retryRequestBody = { ...requestBody }
494-
delete retryRequestBody.previous_response_id
495-
496508
// Clear the stored lastResponseId to prevent using it again
497509
this.lastResponseId = undefined
498510
// Resolve the promise once to unblock any waiting requests
499511
this.resolveResponseId(undefined)
500512

501-
// Retry the request without the previous_response_id
513+
// Re-prepare the full conversation without previous_response_id
514+
let retryRequestBody = { ...requestBody }
515+
delete retryRequestBody.previous_response_id
516+
517+
// If we have the original messages, re-prepare the full conversation
518+
if (systemPrompt && messages) {
519+
const { formattedInput } = this.prepareStructuredInput(systemPrompt, messages, undefined)
520+
retryRequestBody.input = formattedInput
521+
}
522+
523+
// Retry the request with full conversation context
502524
const retryResponse = await fetch(url, {
503525
method: "POST",
504526
headers: {

0 commit comments

Comments
 (0)