Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
241 changes: 241 additions & 0 deletions assistant/src/__tests__/openai-responses-provider.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,16 @@ function functionCallArgsDoneEvent(
};
}

function webSearchCallAddedEvent(itemId: string): FakeStreamEvent {
return {
type: "response.output_item.added",
item: {
type: "web_search_call",
id: itemId,
},
};
}

function completedEvent(
inputTokens: number,
outputTokens: number,
Expand Down Expand Up @@ -1242,4 +1252,235 @@ describe("OpenAIResponsesProvider — Native Web Search", () => {
strict: null,
});
});

// -----------------------------------------------------------------------
// web_search_call stream event handling
// -----------------------------------------------------------------------

test("emits server_tool_start when web_search_call output item is added", async () => {
const nativeProvider = new OpenAIResponsesProvider("sk-test", "gpt-5.2", {
useNativeWebSearch: true,
});
fakeStreamEvents = [
webSearchCallAddedEvent("ws_call_1"),
textDeltaEvent("Search results here."),
completedEvent(50, 30),
];

const events: ProviderEvent[] = [];
await nativeProvider.sendMessage(
[{ role: "user", content: [{ type: "text", text: "Search for cats" }] }],
[webSearchTool],
undefined,
{ onEvent: (e) => events.push(e) },
);

const startEvents = events.filter((e) => e.type === "server_tool_start");
expect(startEvents).toHaveLength(1);
expect(startEvents[0]).toEqual({
type: "server_tool_start",
name: "web_search",
toolUseId: "ws_call_1",
input: {},
});
});

test("emits server_tool_complete on response.completed for tracked web search calls", async () => {
const nativeProvider = new OpenAIResponsesProvider("sk-test", "gpt-5.2", {
useNativeWebSearch: true,
});
fakeStreamEvents = [
webSearchCallAddedEvent("ws_call_1"),
textDeltaEvent("Answer with citations."),
completedEvent(50, 30),
];

const events: ProviderEvent[] = [];
await nativeProvider.sendMessage(
[{ role: "user", content: [{ type: "text", text: "Search for dogs" }] }],
[webSearchTool],
undefined,
{ onEvent: (e) => events.push(e) },
);

const completeEvents = events.filter(
(e) => e.type === "server_tool_complete",
);
expect(completeEvents).toHaveLength(1);
expect(completeEvents[0]).toEqual({
type: "server_tool_complete",
toolUseId: "ws_call_1",
isError: false,
});
});

test("emits server_tool_complete for multiple web search calls", async () => {
const nativeProvider = new OpenAIResponsesProvider("sk-test", "gpt-5.2", {
useNativeWebSearch: true,
});
fakeStreamEvents = [
webSearchCallAddedEvent("ws_call_1"),
webSearchCallAddedEvent("ws_call_2"),
textDeltaEvent("Combined results."),
completedEvent(80, 50),
];

const events: ProviderEvent[] = [];
await nativeProvider.sendMessage(
[{ role: "user", content: [{ type: "text", text: "Search multiple" }] }],
[webSearchTool],
undefined,
{ onEvent: (e) => events.push(e) },
);

const startEvents = events.filter((e) => e.type === "server_tool_start");
expect(startEvents).toHaveLength(2);
expect(startEvents[0]).toEqual({
type: "server_tool_start",
name: "web_search",
toolUseId: "ws_call_1",
input: {},
});
expect(startEvents[1]).toEqual({
type: "server_tool_start",
name: "web_search",
toolUseId: "ws_call_2",
input: {},
});

const completeEvents = events.filter(
(e) => e.type === "server_tool_complete",
);
expect(completeEvents).toHaveLength(2);
expect(completeEvents[0]).toEqual({
type: "server_tool_complete",
toolUseId: "ws_call_1",
isError: false,
});
expect(completeEvents[1]).toEqual({
type: "server_tool_complete",
toolUseId: "ws_call_2",
isError: false,
});
});

test("does not emit server_tool events for non-web-search output items", async () => {
const nativeProvider = new OpenAIResponsesProvider("sk-test", "gpt-5.2", {
useNativeWebSearch: true,
});
fakeStreamEvents = [
functionCallAddedEvent("call_1", "file_read"),
functionCallArgsDeltaEvent('{"path":"/tmp/a"}', "call_1"),
functionCallArgsDoneEvent("call_1", "file_read", '{"path":"/tmp/a"}'),
completedEvent(20, 10),
];

const events: ProviderEvent[] = [];
await nativeProvider.sendMessage(
[{ role: "user", content: [{ type: "text", text: "Read file" }] }],
[fileReadTool],
undefined,
{ onEvent: (e) => events.push(e) },
);

const serverToolEvents = events.filter(
(e) =>
e.type === "server_tool_start" || e.type === "server_tool_complete",
);
expect(serverToolEvents).toHaveLength(0);
});

// -----------------------------------------------------------------------
// server_tool_use content blocks in ProviderResponse
// -----------------------------------------------------------------------

test("includes server_tool_use content blocks in response for web search calls", async () => {
const nativeProvider = new OpenAIResponsesProvider("sk-test", "gpt-5.2", {
useNativeWebSearch: true,
});
fakeStreamEvents = [
webSearchCallAddedEvent("ws_call_1"),
textDeltaEvent("Here are the results."),
completedEvent(50, 30),
];

const result = await nativeProvider.sendMessage(
[{ role: "user", content: [{ type: "text", text: "Search for cats" }] }],
[webSearchTool],
);

// server_tool_use should appear before the text content block
expect(result.content).toHaveLength(2);
expect(result.content[0]).toEqual({
type: "server_tool_use",
id: "ws_call_1",
name: "web_search",
input: {},
});
expect(result.content[1]).toEqual({
type: "text",
text: "Here are the results.",
});
});

test("includes multiple server_tool_use blocks for multiple web search calls", async () => {
const nativeProvider = new OpenAIResponsesProvider("sk-test", "gpt-5.2", {
useNativeWebSearch: true,
});
fakeStreamEvents = [
webSearchCallAddedEvent("ws_call_1"),
webSearchCallAddedEvent("ws_call_2"),
textDeltaEvent("Combined search results."),
completedEvent(80, 50),
];

const result = await nativeProvider.sendMessage(
[
{
role: "user",
content: [{ type: "text", text: "Search two things" }],
},
],
[webSearchTool],
);

expect(result.content).toHaveLength(3);
expect(result.content[0]).toEqual({
type: "server_tool_use",
id: "ws_call_1",
name: "web_search",
input: {},
});
expect(result.content[1]).toEqual({
type: "server_tool_use",
id: "ws_call_2",
name: "web_search",
input: {},
});
expect(result.content[2]).toEqual({
type: "text",
text: "Combined search results.",
});
});

test("does not include server_tool_use blocks when no web search calls occur", async () => {
const nativeProvider = new OpenAIResponsesProvider("sk-test", "gpt-5.2", {
useNativeWebSearch: true,
});
fakeStreamEvents = [
textDeltaEvent("No search needed."),
completedEvent(10, 5),
];

const result = await nativeProvider.sendMessage(
[{ role: "user", content: [{ type: "text", text: "Hello" }] }],
[webSearchTool],
);

expect(result.content).toHaveLength(1);
expect(result.content[0]).toEqual({
type: "text",
text: "No search needed.",
});
});
});
12 changes: 11 additions & 1 deletion assistant/src/providers/openai/responses-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -298,8 +298,18 @@ export class OpenAIResponsesProvider implements Provider {
cleanupTimeout();
}

// Build content blocks
// Build content blocks.
// Inject server_tool_use blocks before text so conversation history
// matches the shape Anthropic produces for native web search.
const content: ContentBlock[] = [];
for (const toolUseId of webSearchCallIds) {
content.push({
type: "server_tool_use",
id: toolUseId,
name: "web_search",
input: {},
});
Comment on lines +305 to +311

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Emit paired web-search result with server tool use

This now persists server_tool_use blocks for OpenAI web-search calls but never emits a matching web_search_tool_result block, which means every such message is treated as orphaned server-tool output by repairHistory and gets a synthetic web_search_tool_result_error injected on subsequent turns (via the pre-run repair path in conversation-agent-loop.ts). In practice, successful native web searches are rewritten as synthetic errors in runtime history, adding noisy repairs and polluting follow-up context; either emit a paired result block or avoid adding server_tool_use until a pair can be represented.

Useful? React with 👍 / 👎.

}
Comment on lines +305 to +312

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Missing web_search_tool_result blocks causes history repair to inject synthetic error results on every subsequent turn

The new code injects server_tool_use content blocks into the response (lines 305-311) but never produces the paired web_search_tool_result content blocks that the rest of the codebase expects. The repairHistory function (assistant/src/daemon/history-repair.ts:71-113) runs before every provider API call (assistant/src/daemon/conversation-agent-loop.ts:1023) and explicitly checks that every server_tool_use in an assistant message has a matching web_search_tool_result in the same message. When the pairing is missing, it injects a synthetic error result with {type: "web_search_tool_result_error", error_code: "unavailable"}, logs a warning, and increments stats.missingToolResultsInserted. This means every successful native web search through OpenAI will be retroactively marked as failed in the conversation history on the very next turn. The Anthropic provider produces both block types (see assistant/src/providers/anthropic/client.ts:1386-1393), and the comment on line 302 even says the goal is to match the Anthropic shape — but the pairing is incomplete.

Expected fix pattern

After each server_tool_use block, also push a web_search_tool_result block:

for (const toolUseId of webSearchCallIds) {
  content.push({
    type: "server_tool_use",
    id: toolUseId,
    name: "web_search",
    input: {},
  });
  content.push({
    type: "web_search_tool_result",
    tool_use_id: toolUseId,
    content: [],
  });
}
Suggested change
for (const toolUseId of webSearchCallIds) {
content.push({
type: "server_tool_use",
id: toolUseId,
name: "web_search",
input: {},
});
}
for (const toolUseId of webSearchCallIds) {
content.push({
type: "server_tool_use",
id: toolUseId,
name: "web_search",
input: {},
});
content.push({
type: "web_search_tool_result",
tool_use_id: toolUseId,
content: [],
});
}
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

if (contentText) {
content.push({ type: "text", text: contentText });
}
Expand Down
Loading