Skip to content

Commit 8bf4d82

Browse files
authored
Add Gemini image generation support (#117)
1 parent d1f1190 commit 8bf4d82

File tree

3 files changed

+125
-0
lines changed

3 files changed

+125
-0
lines changed

README.md

+55
Original file line numberDiff line numberDiff line change
@@ -1791,6 +1791,61 @@ Use the file URL returned from the snippet above.
17911791
}
17921792
```
17931793

1794+
### How to generate an image with Gemini
1795+
1796+
```
1797+
import AIProxy
1798+
1799+
/* Uncomment for BYOK use cases */
1800+
// let geminiService = AIProxy.geminiDirectService(
1801+
// unprotectedAPIKey: "your-gemini-key"
1802+
// )
1803+
1804+
/* Uncomment for all other production use cases */
1805+
// let geminiService = AIProxy.geminiService(
1806+
// partialKey: "partial-key-from-your-developer-dashboard",
1807+
// serviceURL: "service-url-from-your-developer-dashboard"
1808+
// )
1809+
1810+
let requestBody = GeminiGenerateContentRequestBody(
1811+
contents: [
1812+
.init(
1813+
parts: [
1814+
.text(
1815+
"""
1816+
Hi, can you create a 3d rendered image of a pig with wings and a top hat
1817+
flying over a happy futuristic scifi city with lots of greenery?
1818+
"""
1819+
)
1820+
],
1821+
role: "user"
1822+
)
1823+
],
1824+
generationConfig: .init(
1825+
responseModalities: [
1826+
"Text",
1827+
"Image"
1828+
]
1829+
)
1830+
)
1831+
1832+
do {
1833+
let response = try await geminiService.generateContentRequest(
1834+
body: requestBody,
1835+
model: "gemini-2.0-flash-exp-image-generation"
1836+
)
1837+
for part in response.candidates?.first?.content?.parts ?? [] {
1838+
if case .inlineData(mimeType: let mimeType, base64Data: let base64Data) = part {
1839+
print("Gemini generated inline data with mimetype: \(mimeType) and base64Length: \(base64Data.count)")
1840+
}
1841+
}
1842+
} catch AIProxyError.unsuccessfulRequest(let statusCode, let responseBody) {
1843+
print("Received \(statusCode) status code with response body: \(responseBody)")
1844+
} catch {
1845+
print("Could not create image using gemini: \(error.localizedDescription)")
1846+
}
1847+
```
1848+
17941849

17951850
***
17961851

Sources/AIProxy/Gemini/GeminiGenerateContentResponseBody.swift

+9
Original file line numberDiff line numberDiff line change
@@ -65,21 +65,30 @@ extension GeminiGenerateContentResponseBody.Candidate.Content {
6565
public enum Part: Decodable {
6666
case text(String)
6767
case functionCall(name: String, args: [String: Any]?)
68+
case inlineData(mimeType: String, base64Data: String)
6869

6970
private enum CodingKeys: String, CodingKey {
7071
case text
7172
case functionCall
73+
case inlineData
7274
}
7375

7476
private struct _FunctionCall: Decodable {
7577
let name: String
7678
let args: [String: AIProxyJSONValue]?
7779
}
7880

81+
private struct _InlineData: Decodable {
82+
let mimeType: String
83+
let data: String
84+
}
85+
7986
public init(from decoder: any Decoder) throws {
8087
let container = try decoder.container(keyedBy: CodingKeys.self)
8188
if let functionCall = try container.decodeIfPresent(_FunctionCall.self, forKey: .functionCall) {
8289
self = .functionCall(name: functionCall.name, args: functionCall.args?.untypedDictionary)
90+
} else if let inlineData = try container.decodeIfPresent(_InlineData.self, forKey: .inlineData) {
91+
self = .inlineData(mimeType: inlineData.mimeType, base64Data: inlineData.data)
8392
} else {
8493
self = .text(try container.decode(String.self, forKey: .text))
8594
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
//
2+
// GeminiGenerateImageResponseTests.swift
3+
// AIProxy
4+
//
5+
// Created by Lou Zell on 3/17/25.
6+
//
7+
8+
import XCTest
9+
import Foundation
10+
@testable import AIProxy
11+
12+
13+
final class GeminiGenerateImageResponseTests: XCTestCase {
14+
15+
func testResponseIsDecodable() throws {
16+
let sampleResponse = #"""
17+
{
18+
"candidates": [
19+
{
20+
"content": {
21+
"parts": [
22+
{
23+
"inlineData": {
24+
"mimeType": "image/png",
25+
"data": "<snip>"
26+
}
27+
}
28+
],
29+
"role": "model"
30+
},
31+
"finishReason": "STOP",
32+
"index": 0
33+
}
34+
],
35+
"usageMetadata": {
36+
"promptTokenCount": 36,
37+
"totalTokenCount": 36,
38+
"promptTokensDetails": [
39+
{
40+
"modality": "TEXT",
41+
"tokenCount": 36
42+
}
43+
]
44+
},
45+
"modelVersion": "gemini-2.0-flash-exp-image-generation"
46+
}
47+
"""#
48+
49+
let body = try GeminiGenerateContentResponseBody.deserialize(from: sampleResponse)
50+
if case .inlineData(mimeType: let mimeType, base64Data: let b64Data) = body.candidates?.first?.content?.parts?.first {
51+
XCTAssertEqual("image/png", mimeType)
52+
XCTAssertEqual("<snip>", b64Data)
53+
} else {
54+
XCTFail()
55+
}
56+
}
57+
58+
}
59+
60+
61+

0 commit comments

Comments
 (0)