Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions FirebaseAI/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# 12.7.0
- [feature] Added support for configuring thinking levels with Gemini 3 series
models and onwards. (#15557)
- [fixed] Fixed support for API keys with iOS+ app
[Bundle ID restrictions](https://docs.cloud.google.com/docs/authentication/api-keys#adding-application-restrictions)
by setting the `x-ios-bundle-identifier` header. (#15475)
Expand Down
76 changes: 67 additions & 9 deletions FirebaseAI/Sources/Types/Public/ThinkingConfig.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@

/// Configuration for controlling the "thinking" behavior of compatible Gemini models.
///
/// Certain models, like Gemini 2.5 Flash and Pro, utilize a thinking process before generating a
/// response. This allows them to reason through complex problems and plan a more coherent and
/// accurate answer.
/// Gemini 2.5 series models and newer utilize a thinking process before generating a response. This
/// allows them to reason through complex problems and plan a more coherent and accurate answer.
/// See the [thinking documentation](https://firebase.google.com/docs/ai-logic/thinking) for more
/// details.
public struct ThinkingConfig: Sendable {
/// The thinking budget in tokens.
///
Expand All @@ -27,16 +28,13 @@ public struct ThinkingConfig: Sendable {
/// If you don't specify a budget (`nil`), the model will automatically determine the appropriate
/// amount of thinking based on the complexity of the prompt.
///
/// **Model-Specific Behavior:**
/// - **Gemini 2.5 Flash:** The budget can range from `0` to `24576`. Setting the budget to `0`
/// disables the thinking process, which prioritizes the lowest latency and cost.
/// - **Gemini 2.5 Pro:** The budget must be an integer between `128` and `32768`. Thinking cannot
/// be disabled for this model.
///
/// An error will be thrown if you set a thinking budget for a model that does not support this
/// feature or if the specified budget is not within the model's supported range.
let thinkingBudget: Int?

/// The level of thoughts tokens that the model should generate.
let thinkingLevel: ThinkingLevel?

/// Whether summaries of the model's "thoughts" are included in responses.
///
/// When `includeThoughts` is set to `true`, the model will return a summary of its internal
Expand All @@ -51,13 +49,73 @@ public struct ThinkingConfig: Sendable {
///
/// - Parameters:
/// - thinkingBudget: The maximum number of tokens to be used for the model's thinking process.
/// The range of [supported thinking budget values
/// ](https://firebase.google.com/docs/ai-logic/thinking#supported-thinking-budget-values)
/// depends on the model.
/// - To use the default thinking budget or thinking level for a model, set this value to
/// `nil` or omit it.
/// - To disable thinking, when supported by the model, set this value to `0`.
/// - To use dynamic thinking, allowing the model to decide on the thinking budget based on
/// the task, set this value to `-1`.
/// - includeThoughts: If true, summaries of the model's "thoughts" are included in responses.
public init(thinkingBudget: Int? = nil, includeThoughts: Bool? = nil) {
self.thinkingBudget = thinkingBudget
thinkingLevel = nil
self.includeThoughts = includeThoughts
}

/// Initializes a `ThinkingConfig` with a ``ThinkingLevel``.
///
/// If you don't specify a thinking level, Gemini will use the model's default dynamic thinking
/// level.
///
/// > Important: Gemini 2.5 series models do not support thinking levels; use
/// > ``init(thinkingBudget:includeThoughts:)`` to set a thinking budget instead.
///
/// - Parameters:
/// - thinkingLevel: A preset that controls the model's "thinking" process. Use
/// ``ThinkingLevel/low`` for faster responses on less complex tasks, and
/// ``ThinkingLevel/high`` for better reasoning on more complex tasks.
/// - includeThoughts: If true, summaries of the model's "thoughts" are included in responses.
public init(thinkingLevel: ThinkingLevel, includeThoughts: Bool? = nil) {
thinkingBudget = nil
self.thinkingLevel = thinkingLevel
self.includeThoughts = includeThoughts
}
}

public extension ThinkingConfig {
/// A preset that balances the trade-off between reasoning quality and response speed for a
/// model's "thinking" process.
struct ThinkingLevel: EncodableProtoEnum, Equatable {
enum Kind: String {
case minimal = "MINIMAL"
case low = "LOW"
case medium = "MEDIUM"
case high = "HIGH"
}

/// Use this level when you want to minimize latency, allowing for minimal thought. This
/// level is faster than ``low``.
public static let minimal = ThinkingLevel(kind: .minimal)

/// This level is suitable for simpler queries or when speed is the priority. This level is
/// faster than ``medium``.
public static let low = ThinkingLevel(kind: .low)

/// Offers a balanced approach suitable for tasks of moderate complexity that benefit from
/// reasoning but don't require deep, multi-step planning. It provides more reasoning
/// capability than ``low`` while maintaining lower latency than ``high``.
public static let medium = ThinkingLevel(kind: .medium)

/// Use this level for complex queries where quality is more important than speed. It allows the
/// model to engage in deeper reasoning but increases latency.
public static let high = ThinkingLevel(kind: .high)

var rawValue: String
}
}

// MARK: - Codable Conformances

extension ThinkingConfig: Encodable {}
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,10 @@ struct GenerateContentIntegrationTests {
(.googleAI_v1beta, ModelNames.gemini2_5_Pro, ThinkingConfig(
thinkingBudget: 32768, includeThoughts: true
)),
(.googleAI_v1beta, ModelNames.gemini3FlashPreview, ThinkingConfig(thinkingLevel: .minimal)),
(.googleAI_v1beta, ModelNames.gemini3FlashPreview, ThinkingConfig(thinkingLevel: .low)),
(.googleAI_v1beta, ModelNames.gemini3FlashPreview, ThinkingConfig(thinkingLevel: .medium)),
(.googleAI_v1beta, ModelNames.gemini3FlashPreview, ThinkingConfig(thinkingLevel: .high)),
(.googleAI_v1beta, ModelNames.gemini3FlashPreview, ThinkingConfig(thinkingBudget: 128)),
(.googleAI_v1beta, ModelNames.gemini3FlashPreview, ThinkingConfig(thinkingBudget: 32768)),
(.googleAI_v1beta, ModelNames.gemini3FlashPreview, ThinkingConfig(
Expand Down Expand Up @@ -229,6 +233,16 @@ struct GenerateContentIntegrationTests {
if let thinkingBudget = thinkingConfig.thinkingBudget, thinkingBudget > 0 {
#expect(usageMetadata.thoughtsTokenCount > 0)
#expect(usageMetadata.thoughtsTokenCount <= thinkingBudget)
} else if let thinkingLevel = thinkingConfig.thinkingLevel {
// For gemini3FlashPreview, repeated runs show that for any of the four
// levels, 64 or 68 may be returned.
let minThoughtTokens = 64
switch thinkingLevel {
case .minimal, .low, .medium, .high:
#expect(usageMetadata.thoughtsTokenCount >= minThoughtTokens)
default:
Issue.record("Unhandled ThinkingLevel: \(thinkingLevel)")
}
} else {
#expect(usageMetadata.thoughtsTokenCount == 0)
}
Expand Down
36 changes: 36 additions & 0 deletions FirebaseAI/Tests/Unit/GenerationConfigTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -234,4 +234,40 @@ final class GenerationConfigTests: XCTestCase {
}
""")
}

func testEncodeGenerationConfig_thinkingConfig() throws {
let testCases: [(ThinkingConfig, String)] = [
(ThinkingConfig(thinkingBudget: 0), "\"thinkingBudget\" : 0"),
(ThinkingConfig(thinkingBudget: 1024), "\"thinkingBudget\" : 1024"),
(ThinkingConfig(thinkingBudget: 1024, includeThoughts: true), """
"includeThoughts" : true,
"thinkingBudget" : 1024
"""),
(ThinkingConfig(thinkingLevel: .minimal), "\"thinkingLevel\" : \"MINIMAL\""),
(ThinkingConfig(thinkingLevel: .low), "\"thinkingLevel\" : \"LOW\""),
(ThinkingConfig(thinkingLevel: .medium), "\"thinkingLevel\" : \"MEDIUM\""),
(ThinkingConfig(thinkingLevel: .high), "\"thinkingLevel\" : \"HIGH\""),
(ThinkingConfig(thinkingLevel: .medium, includeThoughts: true), """
"includeThoughts" : true,
"thinkingLevel" : \"MEDIUM\"
"""),
(ThinkingConfig(thinkingLevel: .medium, includeThoughts: false), """
"includeThoughts" : false,
"thinkingLevel" : \"MEDIUM\"
"""),
]

for (thinkingConfig, expectedJSONSnippet) in testCases {
let generationConfig = GenerationConfig(thinkingConfig: thinkingConfig)
let jsonData = try encoder.encode(generationConfig)
let json = try XCTUnwrap(String(data: jsonData, encoding: .utf8))
XCTAssertEqual(json, """
{
"thinkingConfig" : {
\(expectedJSONSnippet)
}
}
""")
}
}
}
Loading