Skip to content

Commit fb9c57e

Browse files
fix: filter non-Anthropic content blocks before sending to Vertex API (#9618)
1 parent 87d6463 commit fb9c57e

File tree

7 files changed

+453
-4
lines changed

7 files changed

+453
-4
lines changed

src/api/providers/__tests__/anthropic-vertex.spec.ts

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,146 @@ describe("VertexHandler", () => {
601601
text: "Second thinking block",
602602
})
603603
})
604+
605+
it("should filter out internal reasoning blocks before sending to API", async () => {
606+
handler = new AnthropicVertexHandler({
607+
apiModelId: "claude-3-5-sonnet-v2@20241022",
608+
vertexProjectId: "test-project",
609+
vertexRegion: "us-central1",
610+
})
611+
612+
const mockCreate = vitest.fn().mockImplementation(async (options) => {
613+
return {
614+
async *[Symbol.asyncIterator]() {
615+
yield {
616+
type: "message_start",
617+
message: {
618+
usage: {
619+
input_tokens: 10,
620+
output_tokens: 0,
621+
},
622+
},
623+
}
624+
yield {
625+
type: "content_block_start",
626+
index: 0,
627+
content_block: {
628+
type: "text",
629+
text: "Response",
630+
},
631+
}
632+
},
633+
}
634+
})
635+
;(handler["client"].messages as any).create = mockCreate
636+
637+
// Messages with internal reasoning blocks (from stored conversation history)
638+
const messagesWithReasoning: Anthropic.Messages.MessageParam[] = [
639+
{
640+
role: "user",
641+
content: "Hello",
642+
},
643+
{
644+
role: "assistant",
645+
content: [
646+
{
647+
type: "reasoning" as any,
648+
text: "This is internal reasoning that should be filtered",
649+
},
650+
{
651+
type: "text",
652+
text: "This is the response",
653+
},
654+
],
655+
},
656+
{
657+
role: "user",
658+
content: "Continue",
659+
},
660+
]
661+
662+
const stream = handler.createMessage(systemPrompt, messagesWithReasoning)
663+
const chunks: ApiStreamChunk[] = []
664+
665+
for await (const chunk of stream) {
666+
chunks.push(chunk)
667+
}
668+
669+
// Verify the API was called with filtered messages (no reasoning blocks)
670+
const calledMessages = mockCreate.mock.calls[0][0].messages
671+
expect(calledMessages).toHaveLength(3)
672+
673+
// Check user message 1
674+
expect(calledMessages[0]).toMatchObject({
675+
role: "user",
676+
})
677+
678+
// Check assistant message - should have reasoning block filtered out
679+
const assistantMessage = calledMessages.find((m: any) => m.role === "assistant")
680+
expect(assistantMessage).toBeDefined()
681+
expect(assistantMessage.content).toEqual([{ type: "text", text: "This is the response" }])
682+
683+
// Verify reasoning blocks were NOT sent to the API
684+
expect(assistantMessage.content).not.toContainEqual(expect.objectContaining({ type: "reasoning" }))
685+
})
686+
687+
it("should filter empty messages after removing all reasoning blocks", async () => {
688+
handler = new AnthropicVertexHandler({
689+
apiModelId: "claude-3-5-sonnet-v2@20241022",
690+
vertexProjectId: "test-project",
691+
vertexRegion: "us-central1",
692+
})
693+
694+
const mockCreate = vitest.fn().mockImplementation(async (options) => {
695+
return {
696+
async *[Symbol.asyncIterator]() {
697+
yield {
698+
type: "message_start",
699+
message: {
700+
usage: {
701+
input_tokens: 10,
702+
output_tokens: 0,
703+
},
704+
},
705+
}
706+
},
707+
}
708+
})
709+
;(handler["client"].messages as any).create = mockCreate
710+
711+
// Message with only reasoning content (should be completely filtered)
712+
const messagesWithOnlyReasoning: Anthropic.Messages.MessageParam[] = [
713+
{
714+
role: "user",
715+
content: "Hello",
716+
},
717+
{
718+
role: "assistant",
719+
content: [
720+
{
721+
type: "reasoning" as any,
722+
text: "Only reasoning, no actual text",
723+
},
724+
],
725+
},
726+
{
727+
role: "user",
728+
content: "Continue",
729+
},
730+
]
731+
732+
const stream = handler.createMessage(systemPrompt, messagesWithOnlyReasoning)
733+
const chunks: ApiStreamChunk[] = []
734+
735+
for await (const chunk of stream) {
736+
chunks.push(chunk)
737+
}
738+
739+
// Verify empty message was filtered out
740+
const calledMessages = mockCreate.mock.calls[0][0].messages
741+
expect(calledMessages).toHaveLength(2) // Only the two user messages
742+
expect(calledMessages.every((m: any) => m.role === "user")).toBe(true)
743+
})
604744
})
605745

606746
describe("completePrompt", () => {

src/api/providers/__tests__/anthropic.spec.ts

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,4 +289,99 @@ describe("AnthropicHandler", () => {
289289
expect(model.info.outputPrice).toBe(22.5)
290290
})
291291
})
292+
293+
describe("reasoning block filtering", () => {
294+
const systemPrompt = "You are a helpful assistant."
295+
296+
it("should filter out internal reasoning blocks before sending to API", async () => {
297+
handler = new AnthropicHandler({
298+
apiKey: "test-api-key",
299+
apiModelId: "claude-3-5-sonnet-20241022",
300+
})
301+
302+
// Messages with internal reasoning blocks (from stored conversation history)
303+
const messagesWithReasoning: Anthropic.Messages.MessageParam[] = [
304+
{
305+
role: "user",
306+
content: "Hello",
307+
},
308+
{
309+
role: "assistant",
310+
content: [
311+
{
312+
type: "reasoning" as any,
313+
text: "This is internal reasoning that should be filtered",
314+
},
315+
{
316+
type: "text",
317+
text: "This is the response",
318+
},
319+
],
320+
},
321+
{
322+
role: "user",
323+
content: "Continue",
324+
},
325+
]
326+
327+
const stream = handler.createMessage(systemPrompt, messagesWithReasoning)
328+
const chunks: any[] = []
329+
330+
for await (const chunk of stream) {
331+
chunks.push(chunk)
332+
}
333+
334+
// Verify the API was called with filtered messages (no reasoning blocks)
335+
const calledMessages = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0].messages
336+
expect(calledMessages).toHaveLength(3)
337+
338+
// Check assistant message - should have reasoning block filtered out
339+
const assistantMessage = calledMessages.find((m: any) => m.role === "assistant")
340+
expect(assistantMessage).toBeDefined()
341+
expect(assistantMessage.content).toEqual([{ type: "text", text: "This is the response" }])
342+
343+
// Verify reasoning blocks were NOT sent to the API
344+
expect(assistantMessage.content).not.toContainEqual(expect.objectContaining({ type: "reasoning" }))
345+
})
346+
347+
it("should filter empty messages after removing all reasoning blocks", async () => {
348+
handler = new AnthropicHandler({
349+
apiKey: "test-api-key",
350+
apiModelId: "claude-3-5-sonnet-20241022",
351+
})
352+
353+
// Message with only reasoning content (should be completely filtered)
354+
const messagesWithOnlyReasoning: Anthropic.Messages.MessageParam[] = [
355+
{
356+
role: "user",
357+
content: "Hello",
358+
},
359+
{
360+
role: "assistant",
361+
content: [
362+
{
363+
type: "reasoning" as any,
364+
text: "Only reasoning, no actual text",
365+
},
366+
],
367+
},
368+
{
369+
role: "user",
370+
content: "Continue",
371+
},
372+
]
373+
374+
const stream = handler.createMessage(systemPrompt, messagesWithOnlyReasoning)
375+
const chunks: any[] = []
376+
377+
for await (const chunk of stream) {
378+
chunks.push(chunk)
379+
}
380+
381+
// Verify empty message was filtered out
382+
const calledMessages = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0].messages
383+
expect(calledMessages.length).toBe(2) // Only the two user messages
384+
expect(calledMessages.every((m: any) => m.role === "user")).toBe(true)
385+
})
386+
})
292387
})

src/api/providers/anthropic-vertex.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import { safeJsonParse } from "../../shared/safeJsonParse"
1616
import { ApiStream } from "../transform/stream"
1717
import { addCacheBreakpoints } from "../transform/caching/vertex"
1818
import { getModelParams } from "../transform/model-params"
19+
import { filterNonAnthropicBlocks } from "../transform/anthropic-filter"
1920

2021
import { BaseProvider } from "./base-provider"
2122
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
@@ -70,6 +71,9 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
7071
reasoning: thinking,
7172
} = this.getModel()
7273

74+
// Filter out non-Anthropic blocks (reasoning, thoughtSignature, etc.) before sending to the API
75+
const sanitizedMessages = filterNonAnthropicBlocks(messages)
76+
7377
/**
7478
* Vertex API has specific limitations for prompt caching:
7579
* 1. Maximum of 4 blocks can have cache_control
@@ -92,7 +96,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
9296
system: supportsPromptCache
9397
? [{ text: systemPrompt, type: "text" as const, cache_control: { type: "ephemeral" } }]
9498
: systemPrompt,
95-
messages: supportsPromptCache ? addCacheBreakpoints(messages) : messages,
99+
messages: supportsPromptCache ? addCacheBreakpoints(sanitizedMessages) : sanitizedMessages,
96100
stream: true,
97101
}
98102

@@ -158,6 +162,12 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
158162

159163
break
160164
}
165+
case "content_block_stop": {
166+
// Block complete - no action needed for now.
167+
// Note: Signature for multi-turn thinking would require using stream.finalMessage()
168+
// after iteration completes, which requires restructuring the streaming approach.
169+
break
170+
}
161171
}
162172
}
163173
}

src/api/providers/anthropic.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import type { ApiHandlerOptions } from "../../shared/api"
1414

1515
import { ApiStream } from "../transform/stream"
1616
import { getModelParams } from "../transform/model-params"
17+
import { filterNonAnthropicBlocks } from "../transform/anthropic-filter"
1718

1819
import { BaseProvider } from "./base-provider"
1920
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
@@ -45,6 +46,9 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
4546
const cacheControl: CacheControlEphemeral = { type: "ephemeral" }
4647
let { id: modelId, betas = [], maxTokens, temperature, reasoning: thinking } = this.getModel()
4748

49+
// Filter out non-Anthropic blocks (reasoning, thoughtSignature, etc.) before sending to the API
50+
const sanitizedMessages = filterNonAnthropicBlocks(messages)
51+
4852
// Add 1M context beta flag if enabled for Claude Sonnet 4 and 4.5
4953
if (
5054
(modelId === "claude-sonnet-4-20250514" || modelId === "claude-sonnet-4-5") &&
@@ -75,7 +79,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
7579
* know the last message to retrieve from the cache for the
7680
* current request.
7781
*/
78-
const userMsgIndices = messages.reduce(
82+
const userMsgIndices = sanitizedMessages.reduce(
7983
(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),
8084
[] as number[],
8185
)
@@ -91,7 +95,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
9195
thinking,
9296
// Setting cache breakpoint for system prompt so new tasks can reuse it.
9397
system: [{ text: systemPrompt, type: "text", cache_control: cacheControl }],
94-
messages: messages.map((message, index) => {
98+
messages: sanitizedMessages.map((message, index) => {
9599
if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) {
96100
return {
97101
...message,
@@ -142,7 +146,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
142146
max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
143147
temperature,
144148
system: [{ text: systemPrompt, type: "text" }],
145-
messages,
149+
messages: sanitizedMessages,
146150
stream: true,
147151
})) as any
148152
break
@@ -227,6 +231,9 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
227231

228232
break
229233
case "content_block_stop":
234+
// Block complete - no action needed for now.
235+
// Note: Signature for multi-turn thinking would require using stream.finalMessage()
236+
// after iteration completes, which requires restructuring the streaming approach.
230237
break
231238
}
232239
}

src/api/providers/gemini.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
193193
}
194194

195195
const params: GenerateContentParameters = { model, contents, config }
196+
196197
try {
197198
const result = await this.client.models.generateContentStream(params)
198199

0 commit comments

Comments
 (0)