diff --git a/FirebaseAI/CHANGELOG.md b/FirebaseAI/CHANGELOG.md index 8a923e651e4..8b821bc9427 100644 --- a/FirebaseAI/CHANGELOG.md +++ b/FirebaseAI/CHANGELOG.md @@ -1,4 +1,6 @@ # 12.7.0 +- [feature] Added support for configuring thinking levels with Gemini 3 series + models and onwards. (#15557) - [fixed] Fixed support for API keys with iOS+ app [Bundle ID restrictions](https://docs.cloud.google.com/docs/authentication/api-keys#adding-application-restrictions) by setting the `x-ios-bundle-identifier` header. (#15475) diff --git a/FirebaseAI/Sources/Types/Public/ThinkingConfig.swift b/FirebaseAI/Sources/Types/Public/ThinkingConfig.swift index a339f8fa1d1..b71fdc66fba 100644 --- a/FirebaseAI/Sources/Types/Public/ThinkingConfig.swift +++ b/FirebaseAI/Sources/Types/Public/ThinkingConfig.swift @@ -14,9 +14,10 @@ /// Configuration for controlling the "thinking" behavior of compatible Gemini models. /// -/// Certain models, like Gemini 2.5 Flash and Pro, utilize a thinking process before generating a -/// response. This allows them to reason through complex problems and plan a more coherent and -/// accurate answer. +/// Gemini 2.5 series models and newer utilize a thinking process before generating a response. This +/// allows them to reason through complex problems and plan a more coherent and accurate answer. +/// See the [thinking documentation](https://firebase.google.com/docs/ai-logic/thinking) for more +/// details. public struct ThinkingConfig: Sendable { /// The thinking budget in tokens. /// @@ -27,16 +28,13 @@ public struct ThinkingConfig: Sendable { /// If you don't specify a budget (`nil`), the model will automatically determine the appropriate /// amount of thinking based on the complexity of the prompt. /// - /// **Model-Specific Behavior:** - /// - **Gemini 2.5 Flash:** The budget can range from `0` to `24576`. Setting the budget to `0` - /// disables the thinking process, which prioritizes the lowest latency and cost. - /// - **Gemini 2.5 Pro:** The budget must be an integer between `128` and `32768`. Thinking cannot - /// be disabled for this model. - /// /// An error will be thrown if you set a thinking budget for a model that does not support this /// feature or if the specified budget is not within the model's supported range. let thinkingBudget: Int? + /// The level of thoughts tokens that the model should generate. + let thinkingLevel: ThinkingLevel? + /// Whether summaries of the model's "thoughts" are included in responses. /// /// When `includeThoughts` is set to `true`, the model will return a summary of its internal @@ -51,13 +49,73 @@ public struct ThinkingConfig: Sendable { /// /// - Parameters: /// - thinkingBudget: The maximum number of tokens to be used for the model's thinking process. + /// The range of [supported thinking budget values + /// ](https://firebase.google.com/docs/ai-logic/thinking#supported-thinking-budget-values) + /// depends on the model. + /// - To use the default thinking budget or thinking level for a model, set this value to + /// `nil` or omit it. + /// - To disable thinking, when supported by the model, set this value to `0`. + /// - To use dynamic thinking, allowing the model to decide on the thinking budget based on + /// the task, set this value to `-1`. /// - includeThoughts: If true, summaries of the model's "thoughts" are included in responses. public init(thinkingBudget: Int? = nil, includeThoughts: Bool? = nil) { self.thinkingBudget = thinkingBudget + thinkingLevel = nil + self.includeThoughts = includeThoughts + } + + /// Initializes a `ThinkingConfig` with a ``ThinkingLevel``. + /// + /// If you don't specify a thinking level, Gemini will use the model's default dynamic thinking + /// level. + /// + /// > Important: Gemini 2.5 series models do not support thinking levels; use + /// > ``init(thinkingBudget:includeThoughts:)`` to set a thinking budget instead. + /// + /// - Parameters: + /// - thinkingLevel: A preset that controls the model's "thinking" process. Use + /// ``ThinkingLevel/low`` for faster responses on less complex tasks, and + /// ``ThinkingLevel/high`` for better reasoning on more complex tasks. + /// - includeThoughts: If true, summaries of the model's "thoughts" are included in responses. + public init(thinkingLevel: ThinkingLevel, includeThoughts: Bool? = nil) { + thinkingBudget = nil + self.thinkingLevel = thinkingLevel self.includeThoughts = includeThoughts } } +public extension ThinkingConfig { + /// A preset that balances the trade-off between reasoning quality and response speed for a + /// model's "thinking" process. + struct ThinkingLevel: EncodableProtoEnum, Equatable { + enum Kind: String { + case minimal = "MINIMAL" + case low = "LOW" + case medium = "MEDIUM" + case high = "HIGH" + } + + /// Use this level when you want to minimize latency, allowing for minimal thought. This + /// level is faster than ``low``. + public static let minimal = ThinkingLevel(kind: .minimal) + + /// This level is suitable for simpler queries or when speed is the priority. This level is + /// faster than ``medium``. + public static let low = ThinkingLevel(kind: .low) + + /// Offers a balanced approach suitable for tasks of moderate complexity that benefit from + /// reasoning but don't require deep, multi-step planning. It provides more reasoning + /// capability than ``low`` while maintaining lower latency than ``high``. + public static let medium = ThinkingLevel(kind: .medium) + + /// Use this level for complex queries where quality is more important than speed. It allows the + /// model to engage in deeper reasoning but increases latency. + public static let high = ThinkingLevel(kind: .high) + + var rawValue: String + } +} + // MARK: - Codable Conformances extension ThinkingConfig: Encodable {} diff --git a/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift b/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift index f8102f70229..aa4123d5a9e 100644 --- a/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift +++ b/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift @@ -170,6 +170,10 @@ struct GenerateContentIntegrationTests { (.googleAI_v1beta, ModelNames.gemini2_5_Pro, ThinkingConfig( thinkingBudget: 32768, includeThoughts: true )), + (.googleAI_v1beta, ModelNames.gemini3FlashPreview, ThinkingConfig(thinkingLevel: .minimal)), + (.googleAI_v1beta, ModelNames.gemini3FlashPreview, ThinkingConfig(thinkingLevel: .low)), + (.googleAI_v1beta, ModelNames.gemini3FlashPreview, ThinkingConfig(thinkingLevel: .medium)), + (.googleAI_v1beta, ModelNames.gemini3FlashPreview, ThinkingConfig(thinkingLevel: .high)), (.googleAI_v1beta, ModelNames.gemini3FlashPreview, ThinkingConfig(thinkingBudget: 128)), (.googleAI_v1beta, ModelNames.gemini3FlashPreview, ThinkingConfig(thinkingBudget: 32768)), (.googleAI_v1beta, ModelNames.gemini3FlashPreview, ThinkingConfig( @@ -229,6 +233,16 @@ struct GenerateContentIntegrationTests { if let thinkingBudget = thinkingConfig.thinkingBudget, thinkingBudget > 0 { #expect(usageMetadata.thoughtsTokenCount > 0) #expect(usageMetadata.thoughtsTokenCount <= thinkingBudget) + } else if let thinkingLevel = thinkingConfig.thinkingLevel { + // For gemini3FlashPreview, repeated runs show that for any of the four + // levels, 64 or 68 may be returned. + let minThoughtTokens = 64 + switch thinkingLevel { + case .minimal, .low, .medium, .high: + #expect(usageMetadata.thoughtsTokenCount >= minThoughtTokens) + default: + Issue.record("Unhandled ThinkingLevel: \(thinkingLevel)") + } } else { #expect(usageMetadata.thoughtsTokenCount == 0) } diff --git a/FirebaseAI/Tests/Unit/GenerationConfigTests.swift b/FirebaseAI/Tests/Unit/GenerationConfigTests.swift index edbde87fc7d..e02e765b757 100644 --- a/FirebaseAI/Tests/Unit/GenerationConfigTests.swift +++ b/FirebaseAI/Tests/Unit/GenerationConfigTests.swift @@ -234,4 +234,40 @@ final class GenerationConfigTests: XCTestCase { } """) } + + func testEncodeGenerationConfig_thinkingConfig() throws { + let testCases: [(ThinkingConfig, String)] = [ + (ThinkingConfig(thinkingBudget: 0), "\"thinkingBudget\" : 0"), + (ThinkingConfig(thinkingBudget: 1024), "\"thinkingBudget\" : 1024"), + (ThinkingConfig(thinkingBudget: 1024, includeThoughts: true), """ + "includeThoughts" : true, + "thinkingBudget" : 1024 + """), + (ThinkingConfig(thinkingLevel: .minimal), "\"thinkingLevel\" : \"MINIMAL\""), + (ThinkingConfig(thinkingLevel: .low), "\"thinkingLevel\" : \"LOW\""), + (ThinkingConfig(thinkingLevel: .medium), "\"thinkingLevel\" : \"MEDIUM\""), + (ThinkingConfig(thinkingLevel: .high), "\"thinkingLevel\" : \"HIGH\""), + (ThinkingConfig(thinkingLevel: .medium, includeThoughts: true), """ + "includeThoughts" : true, + "thinkingLevel" : \"MEDIUM\" + """), + (ThinkingConfig(thinkingLevel: .medium, includeThoughts: false), """ + "includeThoughts" : false, + "thinkingLevel" : \"MEDIUM\" + """), + ] + + for (thinkingConfig, expectedJSONSnippet) in testCases { + let generationConfig = GenerationConfig(thinkingConfig: thinkingConfig) + let jsonData = try encoder.encode(generationConfig) + let json = try XCTUnwrap(String(data: jsonData, encoding: .utf8)) + XCTAssertEqual(json, """ + { + "thinkingConfig" : { + \(expectedJSONSnippet) + } + } + """) + } + } }