Skip to content

Commit 236092d

Browse files
added google search with grounding for Gemini 2 (#109)
1 parent dede7eb commit 236092d

5 files changed

+369
-4
lines changed

README.md

+75
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,81 @@ credits that you can put towards Gemini.
12471247
print("Could not create Gemini grounding search request: \(error.localizedDescription)")
12481248
}
12491249

1250+
### How to make a google search grounding call with Gemini 2.0
1251+
1252+
It's important that you connect a GCP billing account to your Gemini API key to use this
1253+
feature. Otherwise, Gemini will return 429s for every call. You can connect your billing
1254+
account for the API keys you use [here](https://aistudio.google.com/app/apikey).
1255+
1256+
Consider applying to [google for startups](https://cloud.google.com/startup?hl=en) to gain
1257+
credits that you can put towards Gemini.
1258+
1259+
```swift
1260+
import AIProxy
1261+
1262+
/* Uncomment for BYOK use cases */
1263+
// let geminiService = AIProxy.geminiDirectService(
1264+
// unprotectedAPIKey: "your-gemini-key"
1265+
// )
1266+
1267+
/* Uncomment for all other production use cases */
1268+
// let geminiService = AIProxy.geminiService(
1269+
// partialKey: "partial-key-from-your-developer-dashboard",
1270+
// serviceURL: "service-url-from-your-developer-dashboard"
1271+
// )
1272+
1273+
let requestBody = GeminiGenerateContentRequestBody(
1274+
contents: [
1275+
.init(
1276+
parts: [.text("What is the price of Google stock today")],
1277+
role: "user"
1278+
)
1279+
],
1280+
generationConfig: .init(
1281+
temperature: 0.7
1282+
),
1283+
systemInstruction: .init(
1284+
parts: [.text("You are a helpful assistant")]
1285+
),
1286+
tools: [
1287+
.googleSearch(.init())
1288+
]
1289+
)
1290+
do {
1291+
let response = try await geminiService.generateContentRequest(
1292+
body: requestBody,
1293+
model: "gemini-2.0-flash"
1294+
)
1295+
for candidate in response.candidates ?? [] {
1296+
for part in candidate.content?.parts ?? [] {
1297+
switch part {
1298+
case .text(let text):
1299+
print("Gemini sent: \(text)\n")
1300+
print("Gemini used \(candidate.groundingMetadata?.groundingChunks?.count ?? 0) grounding chunks")
1301+
print("Gemini used \(candidate.groundingMetadata?.groundingSupports?.count ?? 0) grounding supports")
1302+
case .functionCall(name: let functionName, args: let arguments):
1303+
print("Gemini wants us to call function \(functionName) with arguments: \(arguments ?? [:])")
1304+
}
1305+
}
1306+
}
1307+
if let usage = response.usageMetadata {
1308+
print(
1309+
"""
1310+
Used:
1311+
\(usage.promptTokenCount ?? 0) prompt tokens
1312+
\(usage.cachedContentTokenCount ?? 0) cached tokens
1313+
\(usage.candidatesTokenCount ?? 0) candidate tokens
1314+
\(usage.totalTokenCount ?? 0) total tokens
1315+
"""
1316+
)
1317+
}
1318+
} catch AIProxyError.unsuccessfulRequest(let statusCode, let responseBody) {
1319+
print("Received \(statusCode) status code with response body: \(responseBody)")
1320+
} catch {
1321+
print("Could not create Gemini google search grounding request: \(error.localizedDescription)")
1322+
}
1323+
```
1324+
12501325

12511326
### How to transcribe audio with Gemini
12521327

Sources/AIProxy/Gemini/GeminiGenerateContentRequestBody.swift

+33-4
Original file line numberDiff line numberDiff line change
@@ -151,11 +151,15 @@ extension GeminiGenerateContentRequestBody {
151151
/// The model or system does not execute the function. Instead the defined function may be returned as a FunctionCall with arguments to the client side for execution. The model may decide to call a subset of these functions by populating FunctionCall in the response. The next conversation turn may contain a FunctionResponse with the Content.role "function" generation context for the next model turn.
152152
case functionDeclarations([FunctionDeclaration])
153153

154-
/// Retrieval tool that is powered by Google search.
154+
/// Retrieval tool that is powered by Google search (dynamic retrieval for Gemini 1.5)
155155
case googleSearchRetrieval(DynamicRetrievalConfig)
156+
157+
/// Google Search tool for Gemini 2.0
158+
case googleSearch(GoogleSearch)
156159

157160
private enum RootKey: CodingKey {
158161
case functionDeclarations
162+
case googleSearch
159163
case googleSearchRetrieval
160164
}
161165

@@ -164,13 +168,26 @@ extension GeminiGenerateContentRequestBody {
164168
switch self {
165169
case .functionDeclarations(let functionDeclarations):
166170
try container.encode(functionDeclarations, forKey: .functionDeclarations)
167-
case .googleSearchRetrieval(let dynamicRetrievalConfig):
168-
try container.encode(dynamicRetrievalConfig, forKey: .googleSearchRetrieval)
171+
case .googleSearchRetrieval(let config):
172+
try container.encode(config, forKey: .googleSearchRetrieval)
173+
case .googleSearch(let config):
174+
try container.encode(config, forKey: .googleSearch)
169175
}
170176
}
171177
}
172178
}
173179

180+
extension GeminiGenerateContentRequestBody {
181+
/// A simple struct that represents the Google Search tool for Gemini 2.0
182+
/// No configuration options are needed for the basic implementation
183+
public struct GoogleSearch: Encodable {
184+
// Add a public initializer
185+
public init() {
186+
// No initialization needed
187+
}
188+
}
189+
}
190+
174191
// MARK: - RequestBody.Tool.DynamicRetrievalConfig
175192
extension GeminiGenerateContentRequestBody.Tool {
176193
/// Describes the options to customize dynamic retrieval.
@@ -374,21 +391,30 @@ extension GeminiGenerateContentRequestBody {
374391
public let topK: Int?
375392
public let presencePenalty: Double?
376393
public let frequencyPenalty: Double?
394+
public let responseModalities: [String]?
395+
public let responseMimeType: String?
396+
public let responseSchema: [String: AIProxyJSONValue]?
377397

378398
public init(
379399
maxOutputTokens: Int? = nil,
380400
temperature: Double? = nil,
381401
topP: Double? = nil,
382402
topK: Int? = nil,
383403
presencePenalty: Double? = nil,
384-
frequencyPenalty: Double? = nil
404+
frequencyPenalty: Double? = nil,
405+
responseModalities: [String]? = nil,
406+
responseMimeType: String? = nil,
407+
responseSchema: [String: AIProxyJSONValue]? = nil
385408
) {
386409
self.maxOutputTokens = maxOutputTokens
387410
self.temperature = temperature
388411
self.topP = topP
389412
self.topK = topK
390413
self.presencePenalty = presencePenalty
391414
self.frequencyPenalty = frequencyPenalty
415+
self.responseModalities = responseModalities
416+
self.responseMimeType = responseMimeType
417+
self.responseSchema = responseSchema
392418
}
393419

394420
private enum CodingKeys: String, CodingKey {
@@ -398,6 +424,9 @@ extension GeminiGenerateContentRequestBody {
398424
case topK = "top_k"
399425
case presencePenalty = "presence_penalty"
400426
case frequencyPenalty = "frequency_penalty"
427+
case responseModalities = "response_modalities"
428+
case responseMimeType = "response_mime_type"
429+
case responseSchema = "response_schema"
401430
}
402431
}
403432
}

Sources/AIProxy/Gemini/GeminiGenerateContentResponseBody.swift

+46
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ extension GeminiGenerateContentResponseBody {
2929
/// If empty, the model has not stopped generating tokens.
3030
public let finishReason: String?
3131

32+
/// Grounding metadata for the candidate.
33+
public let groundingMetadata: GroundingMetadata?
34+
3235
/// Index of the candidate in the list of response candidates.
3336
public let index: Int?
3437

@@ -101,6 +104,49 @@ extension GeminiGenerateContentResponseBody.Candidate {
101104
}
102105
}
103106

107+
// Extension to handle grounding metadata in the response
108+
extension GeminiGenerateContentResponseBody.Candidate {
109+
/// Grounding metadata containing information about search results used for the response
110+
public struct GroundingMetadata: Decodable {
111+
public let searchEntryPoint: SearchEntryPoint?
112+
public let groundingChunks: [GroundingChunk]?
113+
public let groundingSupports: [GroundingSupport]?
114+
public let webSearchQueries: [String]?
115+
116+
private enum CodingKeys: String, CodingKey {
117+
case searchEntryPoint
118+
case groundingChunks
119+
case groundingSupports
120+
case webSearchQueries
121+
}
122+
}
123+
124+
public struct SearchEntryPoint: Decodable {
125+
public let renderedContent: String?
126+
}
127+
128+
public struct GroundingChunk: Decodable {
129+
public let web: WebInfo?
130+
}
131+
132+
public struct WebInfo: Decodable {
133+
public let uri: String?
134+
public let title: String?
135+
}
136+
137+
public struct GroundingSupport: Decodable {
138+
public let segment: Segment?
139+
public let groundingChunkIndices: [Int]?
140+
public let confidenceScores: [Double]?
141+
}
142+
143+
public struct Segment: Decodable {
144+
public let startIndex: Int?
145+
public let endIndex: Int?
146+
public let text: String?
147+
}
148+
}
149+
104150
// MARK: - ResponseBody.UsageMetadata
105151
extension GeminiGenerateContentResponseBody {
106152
/// Metadata on the generation request's token usage.

Tests/AIProxyTests/GeminiGenerateContentRequestTests.swift

+37
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,43 @@ final class GeminiGenerateContentRequestTests: XCTestCase {
5252
try requestBody.serialize(pretty: true)
5353
)
5454
}
55+
56+
func testGroundingRequestWithGoogleSearchIsEncodableToJson() throws {
57+
let requestBody = GeminiGenerateContentRequestBody(
58+
contents: [
59+
.init(
60+
parts: [.text("What is the price of Google stock today")],
61+
role: "user"
62+
)
63+
],
64+
tools: [
65+
.googleSearch(GeminiGenerateContentRequestBody.GoogleSearch())
66+
]
67+
)
68+
XCTAssertEqual(#"""
69+
{
70+
"contents" : [
71+
{
72+
"parts" : [
73+
{
74+
"text" : "What is the price of Google stock today"
75+
}
76+
],
77+
"role" : "user"
78+
}
79+
],
80+
"tools" : [
81+
{
82+
"googleSearch" : {
83+
84+
}
85+
}
86+
]
87+
}
88+
"""#,
89+
try requestBody.serialize(pretty: true)
90+
)
91+
}
5592

5693
func testRequestWithSystemInstructionIsEncodable() throws {
5794
let requestBody = GeminiGenerateContentRequestBody(

0 commit comments

Comments
 (0)