Skip to content

Commit e618d88

Browse files
feat: enable native tool calling for gemini provider (#9343)
Co-authored-by: daniel-lxs <[email protected]>
1 parent ee19904 commit e618d88

File tree

4 files changed

+313
-125
lines changed

4 files changed

+313
-125
lines changed

packages/types/src/providers/gemini.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export const geminiModels = {
1010
maxTokens: 65_536,
1111
contextWindow: 1_048_576,
1212
supportsImages: true,
13+
supportsNativeTools: true,
1314
supportsPromptCache: true,
1415
supportsReasoningEffort: ["low", "high"],
1516
reasoningEffort: "low",
@@ -35,6 +36,7 @@ export const geminiModels = {
3536
maxTokens: 64_000,
3637
contextWindow: 1_048_576,
3738
supportsImages: true,
39+
supportsNativeTools: true,
3840
supportsPromptCache: true,
3941
inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
4042
outputPrice: 15,
@@ -62,6 +64,7 @@ export const geminiModels = {
6264
maxTokens: 65_535,
6365
contextWindow: 1_048_576,
6466
supportsImages: true,
67+
supportsNativeTools: true,
6568
supportsPromptCache: true,
6669
inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
6770
outputPrice: 15,
@@ -88,6 +91,7 @@ export const geminiModels = {
8891
maxTokens: 65_535,
8992
contextWindow: 1_048_576,
9093
supportsImages: true,
94+
supportsNativeTools: true,
9195
supportsPromptCache: true,
9296
inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
9397
outputPrice: 15,
@@ -112,6 +116,7 @@ export const geminiModels = {
112116
maxTokens: 65_535,
113117
contextWindow: 1_048_576,
114118
supportsImages: true,
119+
supportsNativeTools: true,
115120
supportsPromptCache: true,
116121
inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
117122
outputPrice: 15,
@@ -140,6 +145,7 @@ export const geminiModels = {
140145
maxTokens: 65_536,
141146
contextWindow: 1_048_576,
142147
supportsImages: true,
148+
supportsNativeTools: true,
143149
supportsPromptCache: true,
144150
inputPrice: 0.3,
145151
outputPrice: 2.5,
@@ -152,6 +158,7 @@ export const geminiModels = {
152158
maxTokens: 65_536,
153159
contextWindow: 1_048_576,
154160
supportsImages: true,
161+
supportsNativeTools: true,
155162
supportsPromptCache: true,
156163
inputPrice: 0.3,
157164
outputPrice: 2.5,
@@ -164,6 +171,7 @@ export const geminiModels = {
164171
maxTokens: 64_000,
165172
contextWindow: 1_048_576,
166173
supportsImages: true,
174+
supportsNativeTools: true,
167175
supportsPromptCache: true,
168176
inputPrice: 0.3,
169177
outputPrice: 2.5,
@@ -178,6 +186,7 @@ export const geminiModels = {
178186
maxTokens: 65_536,
179187
contextWindow: 1_048_576,
180188
supportsImages: true,
189+
supportsNativeTools: true,
181190
supportsPromptCache: true,
182191
inputPrice: 0.1,
183192
outputPrice: 0.4,
@@ -190,6 +199,7 @@ export const geminiModels = {
190199
maxTokens: 65_536,
191200
contextWindow: 1_048_576,
192201
supportsImages: true,
202+
supportsNativeTools: true,
193203
supportsPromptCache: true,
194204
inputPrice: 0.1,
195205
outputPrice: 0.4,

src/api/providers/gemini.ts

Lines changed: 79 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import {
55
type GenerateContentParameters,
66
type GenerateContentConfig,
77
type GroundingMetadata,
8+
FunctionCallingConfigMode,
9+
Content,
810
} from "@google/genai"
911
import type { JWTInput } from "google-auth-library"
1012

@@ -101,17 +103,46 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
101103
return true
102104
})
103105

104-
const contents = geminiMessages.map((message) =>
105-
convertAnthropicMessageToGemini(message, { includeThoughtSignatures }),
106-
)
106+
// Build a map of tool IDs to names from previous messages
107+
// This is needed because Anthropic's tool_result blocks only contain the ID,
108+
// but Gemini requires the name in functionResponse
109+
const toolIdToName = new Map<string, string>()
110+
for (const message of messages) {
111+
if (Array.isArray(message.content)) {
112+
for (const block of message.content) {
113+
if (block.type === "tool_use") {
114+
toolIdToName.set(block.id, block.name)
115+
}
116+
}
117+
}
118+
}
119+
120+
const contents = geminiMessages
121+
.map((message) => convertAnthropicMessageToGemini(message, { includeThoughtSignatures, toolIdToName }))
122+
.flat()
107123

108124
const tools: GenerateContentConfig["tools"] = []
109-
if (this.options.enableUrlContext) {
110-
tools.push({ urlContext: {} })
111-
}
112125

113-
if (this.options.enableGrounding) {
114-
tools.push({ googleSearch: {} })
126+
// Google built-in tools (Grounding, URL Context) are currently mutually exclusive
127+
// with function declarations in the Gemini API. If native function calling is
128+
// used (Agent tools), we must prioritize it and skip built-in tools to avoid
129+
// "Tool use with function calling is unsupported" (HTTP 400) errors.
130+
if (metadata?.tools && metadata.tools.length > 0) {
131+
tools.push({
132+
functionDeclarations: metadata.tools.map((tool) => ({
133+
name: (tool as any).function.name,
134+
description: (tool as any).function.description,
135+
parametersJsonSchema: (tool as any).function.parameters,
136+
})),
137+
})
138+
} else {
139+
if (this.options.enableUrlContext) {
140+
tools.push({ urlContext: {} })
141+
}
142+
143+
if (this.options.enableGrounding) {
144+
tools.push({ googleSearch: {} })
145+
}
115146
}
116147

117148
// Determine temperature respecting model capabilities and defaults:
@@ -133,6 +164,34 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
133164
...(tools.length > 0 ? { tools } : {}),
134165
}
135166

167+
if (metadata?.tool_choice) {
168+
const choice = metadata.tool_choice
169+
let mode: FunctionCallingConfigMode
170+
let allowedFunctionNames: string[] | undefined
171+
172+
if (choice === "auto") {
173+
mode = FunctionCallingConfigMode.AUTO
174+
} else if (choice === "none") {
175+
mode = FunctionCallingConfigMode.NONE
176+
} else if (choice === "required") {
177+
// "required" means the model must call at least one tool; Gemini uses ANY for this.
178+
mode = FunctionCallingConfigMode.ANY
179+
} else if (typeof choice === "object" && "function" in choice && choice.type === "function") {
180+
mode = FunctionCallingConfigMode.ANY
181+
allowedFunctionNames = [choice.function.name]
182+
} else {
183+
// Fall back to AUTO for unknown values to avoid unintentionally broadening tool access.
184+
mode = FunctionCallingConfigMode.AUTO
185+
}
186+
187+
config.toolConfig = {
188+
functionCallingConfig: {
189+
mode,
190+
...(allowedFunctionNames ? { allowedFunctionNames } : {}),
191+
},
192+
}
193+
}
194+
136195
const params: GenerateContentParameters = { model, contents, config }
137196
try {
138197
const result = await this.client.models.generateContentStream(params)
@@ -141,6 +200,8 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
141200
let pendingGroundingMetadata: GroundingMetadata | undefined
142201
let finalResponse: { responseId?: string } | undefined
143202

203+
let toolCallCounter = 0
204+
144205
for await (const chunk of result) {
145206
// Track the final structured response (per SDK pattern: candidate.finishReason)
146207
if (chunk.candidates && chunk.candidates[0]?.finishReason) {
@@ -159,6 +220,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
159220
thought?: boolean
160221
text?: string
161222
thoughtSignature?: string
223+
functionCall?: { name: string; args: Record<string, unknown> }
162224
}>) {
163225
// Capture thought signatures so they can be persisted into API history.
164226
const thoughtSignature = part.thoughtSignature
@@ -173,6 +235,14 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
173235
if (part.text) {
174236
yield { type: "reasoning", text: part.text }
175237
}
238+
} else if (part.functionCall) {
239+
const callId = `${part.functionCall.name}-${toolCallCounter++}`
240+
yield {
241+
type: "tool_call",
242+
id: callId,
243+
name: part.functionCall.name,
244+
arguments: JSON.stringify(part.functionCall.args),
245+
}
176246
} else {
177247
// This is regular content
178248
if (part.text) {
@@ -350,12 +420,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
350420
const countTokensRequest = {
351421
model,
352422
// Token counting does not need encrypted continuation; always drop thoughtSignature.
353-
contents: [
354-
{
355-
role: "user",
356-
parts: convertAnthropicContentToGemini(content, { includeThoughtSignatures: false }),
357-
},
358-
],
423+
contents: convertAnthropicContentToGemini(content, { includeThoughtSignatures: false }),
359424
}
360425

361426
const response = await this.client.models.countTokens(countTokensRequest)

0 commit comments

Comments
 (0)