Skip to content

Commit 07ee2f9

Browse files
authored
Add Imagen 3 support through Gemini API (#118)
1 parent c9689ae commit 07ee2f9

8 files changed

+265
-5
lines changed

README.md

+46
Original file line numberDiff line numberDiff line change
@@ -1853,6 +1853,52 @@ Use the file URL returned from the snippet above.
18531853
}
18541854
```
18551855

1856+
### How to generate an image with Imagen
1857+
1858+
```swift
1859+
import AIProxy
1860+
1861+
/* Uncomment for BYOK use cases */
1862+
// let geminiService = AIProxy.geminiDirectService(
1863+
// unprotectedAPIKey: "your-gemini-key"
1864+
// )
1865+
1866+
/* Uncomment for all other production use cases */
1867+
// let geminiService = AIProxy.geminiService(
1868+
// partialKey: "partial-key-from-your-developer-dashboard",
1869+
// serviceURL: "service-url-from-your-developer-dashboard"
1870+
// )
1871+
1872+
let requestBody = GeminiImagenRequestBody(
1873+
instances: [
1874+
.init(prompt: prompt)
1875+
],
1876+
parameters: .init(
1877+
personGeneration: .allowAdult,
1878+
safetyLevel: .blockNone,
1879+
sampleCount: 1
1880+
)
1881+
)
1882+
1883+
do {
1884+
let response = try await geminiService.makeImagenRequest(
1885+
body: requestBody,
1886+
model: "imagen-3.0-generate-002"
1887+
)
1888+
if let base64Data = response.predictions.first?.bytesBase64Encoded,
1889+
let imageData = Data(base64Encoded: base64Data),
1890+
let image = UIImage(data: imageData) {
1891+
// Do something with image
1892+
} else {
1893+
print("Imagen response did not include base64 image data")
1894+
}
1895+
} catch AIProxyError.unsuccessfulRequest(let statusCode, let responseBody) {
1896+
print("Received \(statusCode) status code with response body: \(responseBody)")
1897+
} catch {
1898+
print("Could not create Imagen image: \(error.localizedDescription)")
1899+
}
1900+
```
1901+
18561902

18571903
***
18581904

Sources/AIProxy/AIProxy.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import UIKit
88
public struct AIProxy {
99

1010
/// The current sdk version
11-
public static let sdkVersion = "0.77.0"
11+
public static let sdkVersion = "0.78.0"
1212

1313
/// - Parameters:
1414
/// - partialKey: Your partial key is displayed in the AIProxy dashboard when you submit your provider's key.

Sources/AIProxy/Gemini/GeminiDirectService.swift

+19
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,25 @@ open class GeminiDirectService: GeminiService, DirectService {
4343
return try await self.makeRequestAndDeserializeResponse(request)
4444
}
4545

46+
/// Generate images with the Imagen API
47+
public func makeImagenRequest(
48+
body: GeminiImagenRequestBody,
49+
model: String
50+
) async throws -> GeminiImagenResponseBody {
51+
let proxyPath = "/v1beta/models/\(model):predict"
52+
let request = try AIProxyURLRequest.createDirect(
53+
baseURL: "https://generativelanguage.googleapis.com",
54+
path: proxyPath,
55+
body: body.serialize(),
56+
verb: .post,
57+
contentType: "application/json",
58+
additionalHeaders: [
59+
"X-Goog-Api-Key": self.unprotectedAPIKey
60+
]
61+
)
62+
return try await self.makeRequestAndDeserializeResponse(request)
63+
}
64+
4665
/// Uploads a file to Google's short term storage.
4766
///
4867
/// The File API lets you store up to 20 GB of files per project, with a per-file maximum
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
//
2+
// GeminiImagenRequestBody.swift
3+
// AIProxy
4+
//
5+
// Created by Lou Zell on 3/18/25.
6+
//
7+
8+
import Foundation
9+
10+
/// See the Imagen [prompt guide](https://ai.google.dev/gemini-api/docs/imagen-prompt-guide)
11+
///
12+
/// Imagen is described in a few places:
13+
/// - https://ai.google.dev/gemini-api/docs/image-generation#imagen
14+
/// - https://cloud.google.com/vertex-ai/generative-ai/docs/image/model-versioning
15+
/// - https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/imagen-api
16+
///
17+
/// It's unclear to me whether you need to apply to this first:
18+
/// https://cloud.google.com/vertex-ai/generative-ai/docs/image/overview?authuser=1
19+
/// - Request access: Imagen 3 Customization and Editing
20+
/// - Request access: Person and face generation
21+
public struct GeminiImagenRequestBody: Encodable {
22+
public let instances: [Instance]
23+
public let parameters: Parameters
24+
25+
public init(
26+
instances: [GeminiImagenRequestBody.Instance],
27+
parameters: GeminiImagenRequestBody.Parameters
28+
) {
29+
self.instances = instances
30+
self.parameters = parameters
31+
}
32+
}
33+
34+
extension GeminiImagenRequestBody {
35+
public struct Instance: Encodable {
36+
public let prompt: String
37+
public let image: InputImage?
38+
39+
public init(prompt: String, image: GeminiImagenRequestBody.Instance.InputImage? = nil) {
40+
self.prompt = prompt
41+
self.image = image
42+
}
43+
}
44+
45+
public struct Parameters: Encodable {
46+
public init(
47+
aspectRatio: String? = nil,
48+
mode: String? = nil,
49+
personGeneration: GeminiImagenRequestBody.Parameters.PersonGeneration? = nil,
50+
safetyLevel: GeminiImagenRequestBody.Parameters.SafetyLevel? = nil,
51+
sampleCount: Int,
52+
upscaleConfig: GeminiImagenRequestBody.Parameters.UpscaleConfig? = nil
53+
) {
54+
self.aspectRatio = aspectRatio
55+
self.mode = mode
56+
self.personGeneration = personGeneration
57+
self.safetyLevel = safetyLevel
58+
self.sampleCount = sampleCount
59+
self.upscaleConfig = upscaleConfig
60+
}
61+
62+
/// Supported values are "1:1", "3:4", "4:3", "9:16", and "16:9". The default is "1:1".
63+
public let aspectRatio: String?
64+
65+
/// One valid mode is 'upscale'. I do not know what the other valid modes are.
66+
public let mode: String?
67+
68+
/// "dont_allow": Disallow the inclusion of people or faces in images.
69+
/// "allow_adult": Allow generation of adults only.
70+
/// "allow_all": Allow generation of people of all ages.
71+
/// The default value is "allow_adult".
72+
public let personGeneration: PersonGeneration?
73+
74+
/// Adds a filter level to safety filtering. The following values are supported:
75+
///
76+
/// "block_low_and_above": Strongest filtering level, most strict blocking.
77+
/// "block_medium_and_above": Block some problematic prompts and responses.
78+
/// "block_only_high": Reduces the number of requests blocked due to safety filters. May increase objectionable content generated by Imagen.
79+
/// "block_none": Block very few problematic prompts and responses. Access to this feature is restricted.
80+
///
81+
/// The default value is "block_medium_and_above".
82+
public let safetyLevel: SafetyLevel?
83+
84+
/// The number of images to create
85+
public let sampleCount: Int
86+
87+
public let upscaleConfig: UpscaleConfig?
88+
}
89+
}
90+
91+
92+
extension GeminiImagenRequestBody.Parameters {
93+
/// Represents the safety filtering level for content moderation.
94+
public enum SafetyLevel: String, Encodable {
95+
/// Strongest filtering level, most strict blocking. Deprecated value: "block_most".
96+
case blockLowAndAbove = "block_low_and_above"
97+
98+
/// Block some problematic prompts and responses. Deprecated value: "block_some".
99+
case blockMediumAndAbove = "block_medium_and_above"
100+
101+
/// Reduces the number of requests blocked due to safety filters. May increase
102+
/// objectionable content generated by Imagen. Deprecated value: "block_few".
103+
case blockOnlyHigh = "block_only_high"
104+
105+
/// Block very few problematic prompts and responses. Access to this feature is
106+
/// restricted. Previous field value: "block_fewest".
107+
case blockNone = "block_none"
108+
}
109+
}
110+
111+
extension GeminiImagenRequestBody.Parameters {
112+
/// Controls the generation of people or faces in images.
113+
public enum PersonGeneration: String, Encodable {
114+
/// Disallow the inclusion of people or faces in images.
115+
case dontAllow = "dont_allow"
116+
117+
/// Allow generation of adults only.
118+
case allowAdult = "allow_adult"
119+
120+
/// Allow generation of people of all ages.
121+
case allowAll = "allow_all"
122+
}
123+
}
124+
125+
extension GeminiImagenRequestBody.Instance {
126+
public struct InputImage: Encodable {
127+
public let data: Data
128+
129+
private enum CodingKeys: String, CodingKey {
130+
case bytesBase64Encoded
131+
}
132+
133+
public init(data: Data) {
134+
self.data = data
135+
}
136+
137+
public func encode(to encoder: any Encoder) throws {
138+
var container = encoder.container(keyedBy: CodingKeys.self)
139+
try container.encode(self.data.base64EncodedString(), forKey: .bytesBase64Encoded)
140+
}
141+
}
142+
}
143+
144+
extension GeminiImagenRequestBody.Parameters {
145+
public struct UpscaleConfig: Encodable {
146+
public enum UpscaleFactor: String, Encodable {
147+
case x2
148+
case x4
149+
}
150+
151+
public let upscaleFactor: UpscaleFactor
152+
153+
public init(upscaleFactor: GeminiImagenRequestBody.Parameters.UpscaleConfig.UpscaleFactor) {
154+
self.upscaleFactor = upscaleFactor
155+
}
156+
}
157+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//
2+
// GeminiImagenResponseBody.swift
3+
// AIProxy
4+
//
5+
// Created by Lou Zell on 3/18/25.
6+
//
7+
8+
public struct GeminiImagenResponseBody: Decodable {
9+
public let predictions: [Prediction]
10+
}
11+
12+
extension GeminiImagenResponseBody {
13+
public struct Prediction: Decodable {
14+
public let mimeType: String?
15+
public let bytesBase64Encoded: String
16+
}
17+
}

Sources/AIProxy/Gemini/GeminiProxiedService.swift

+18
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,24 @@ open class GeminiProxiedService: GeminiService, ProxiedService {
4444
return try await self.makeRequestAndDeserializeResponse(request)
4545
}
4646

47+
/// Generate images with the Imagen API
48+
public func makeImagenRequest(
49+
body: GeminiImagenRequestBody,
50+
model: String
51+
) async throws -> GeminiImagenResponseBody {
52+
let proxyPath = "/v1beta/models/\(model):predict"
53+
let request = try await AIProxyURLRequest.create(
54+
partialKey: self.partialKey,
55+
serviceURL: self.serviceURL,
56+
clientID: self.clientID,
57+
proxyPath: proxyPath,
58+
body: body.serialize(),
59+
verb: .post,
60+
contentType: "application/json"
61+
)
62+
return try await self.makeRequestAndDeserializeResponse(request)
63+
}
64+
4765
/// Uploads a file to Google's short term storage.
4866
///
4967
/// The File API lets you store up to 20 GB of files per project, with a per-file maximum

Sources/AIProxy/Gemini/GeminiService.swift

+7
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@ public protocol GeminiService {
2121
model: String
2222
) async throws -> GeminiGenerateContentResponseBody
2323

24+
/// Generate images with the Imagen API
25+
func makeImagenRequest(
26+
body: GeminiImagenRequestBody,
27+
model: String
28+
) async throws -> GeminiImagenResponseBody
29+
30+
2431
/// Uploads a file to Google's short term storage.
2532
///
2633
/// The File API lets you store up to 20 GB of files per project, with a per-file maximum

Tests/AIProxyTests/GeminiGenerateImageResponseTests.swift

-4
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,4 @@ final class GeminiGenerateImageResponseTests: XCTestCase {
5454
XCTFail()
5555
}
5656
}
57-
5857
}
59-
60-
61-

0 commit comments

Comments
 (0)