diff --git a/assistant/src/__tests__/agent-loop.test.ts b/assistant/src/__tests__/agent-loop.test.ts index 0d48fb5cb06..5472ce37480 100644 --- a/assistant/src/__tests__/agent-loop.test.ts +++ b/assistant/src/__tests__/agent-loop.test.ts @@ -1769,6 +1769,85 @@ describe("AgentLoop", () => { expect(messageCompletes).toHaveLength(2); }); + // Regression: when the model emits [text, tool_use] in a single turn and then + // returns an empty response after the tool result, the loop must NOT nudge — + // the model already delivered its reply before the tool call, and nudging + // would trick it into re-sending the same text verbatim. + test("does not nudge empty response when prior turn had visible text", async () => { + const textPlusToolUseResponse: ProviderResponse = { + content: [ + { type: "text", text: "your move, husband." }, + { + type: "tool_use", + id: "t1", + name: "read_file", + input: { path: "/note.txt" }, + }, + ], + model: "mock-model", + usage: { inputTokens: 10, outputTokens: 5 }, + stopReason: "tool_use", + }; + const emptyResponse: ProviderResponse = { + content: [], + model: "mock-model", + usage: { inputTokens: 10, outputTokens: 0 }, + stopReason: "end_turn", + }; + + const { provider, calls } = createMockProvider([ + textPlusToolUseResponse, + emptyResponse, + ]); + + const toolExecutor = async () => ({ + content: "noted", + isError: false, + }); + + const loop = new AgentLoop( + provider, + "system", + {}, + dummyTools, + toolExecutor, + ); + const events: AgentEvent[] = []; + const history = await loop.run([userMessage], collectEvents(events)); + + // Provider called exactly 2 times: initial [text+tool_use], then empty. + // No third (retry) call because the prior turn had visible text. + expect(calls).toHaveLength(2); + + // No nudge message should appear anywhere in history. + const nudgeInHistory = history.some( + (m) => + m.role === "user" && + m.content.some( + (b) => + b.type === "text" && + "text" in b && + (b as { text: string }).text.includes( + "previous response was empty", + ), + ), + ); + expect(nudgeInHistory).toBe(false); + + // The [text, tool_use] assistant message is preserved in history. + const firstAssistant = history.find((m) => m.role === "assistant"); + expect(firstAssistant).toBeDefined(); + expect(firstAssistant!.content).toEqual([ + { type: "text", text: "your move, husband." }, + { + type: "tool_use", + id: "t1", + name: "read_file", + input: { path: "/note.txt" }, + }, + ]); + }); + test("gives up after max empty response retries", async () => { const emptyResponse: ProviderResponse = { content: [], diff --git a/assistant/src/agent/loop.ts b/assistant/src/agent/loop.ts index 14e532529fc..e148e4362be 100644 --- a/assistant/src/agent/loop.ts +++ b/assistant/src/agent/loop.ts @@ -411,13 +411,34 @@ export class AgentLoop { // This can happen when the model fails to produce output after // receiving a large tool result. Retry once with a nudge before // the message is persisted. + // + // Only nudge when the model hasn't already delivered text to the user + // earlier in this tool-use chain. If a prior assistant turn in history + // contained visible text (e.g. the model said its piece before calling + // a side-effect tool like `remember`), an empty follow-up is the model + // correctly ending its turn — nudging would mislead it into thinking + // its earlier text didn't land and cause a verbatim re-send. const hasVisibleText = response.content.some( (block) => block.type === "text" && block.text.trim().length > 0, ); + const priorAssistantHadVisibleText = (() => { + for (let i = history.length - 1; i >= 0; i--) { + const msg = history[i]; + if (msg.role !== "assistant") continue; + return msg.content.some( + (block) => + block.type === "text" && + typeof (block as { text?: unknown }).text === "string" && + (block as { text: string }).text.trim().length > 0, + ); + } + return false; + })(); if ( !hasVisibleText && toolUseBlocks.length === 0 && toolUseTurns > 0 && + !priorAssistantHadVisibleText && emptyResponseRetries < MAX_EMPTY_RESPONSE_RETRIES ) { emptyResponseRetries++; @@ -437,7 +458,12 @@ export class AgentLoop { continue; } - if (!hasVisibleText && toolUseBlocks.length === 0 && toolUseTurns > 0) { + if ( + !hasVisibleText && + toolUseBlocks.length === 0 && + toolUseTurns > 0 && + !priorAssistantHadVisibleText + ) { rlog.error( { turn: toolUseTurns, retries: emptyResponseRetries }, "Model returned empty response after tool results — retries exhausted",