fix: gemini maxOutputTokens and reasoning config (#9375)

hannesrudolph · web-flow · commit 55e9c880d02c · 2025-11-18T19:52:50.000-05:00
* fix: gemini maxOutputTokens and reasoning config

* test: tighten gemini reasoning typings
diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts
@@ -72,6 +72,15 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 		this.lastThoughtSignature = undefined
 		this.lastResponseId = undefined
 
+		// For hybrid/budget reasoning models (e.g. Gemini 2.5 Pro), respect user-configured
+		// modelMaxTokens so the ThinkingBudget slider can control the cap. For effort-only or
+		// standard models (like gemini-3-pro-preview), ignore any stale modelMaxTokens and
+		// default to the model's computed maxTokens from getModelMaxOutputTokens.
+		const isHybridReasoningModel = info.supportsReasoningBudget || info.requiredReasoningBudget
+		const maxOutputTokens = isHybridReasoningModel
+			? (this.options.modelMaxTokens ?? maxTokens ?? undefined)
+			: (maxTokens ?? undefined)
+
 		// Only forward encrypted reasoning continuations (thoughtSignature) when we are
 		// using effort-based reasoning (thinkingLevel). Budget-only configs should NOT
 		// send thoughtSignature parts back to Gemini.
@@ -119,13 +128,12 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 			systemInstruction,
 			httpOptions: this.options.googleGeminiBaseUrl ? { baseUrl: this.options.googleGeminiBaseUrl } : undefined,
 			thinkingConfig,
-			maxOutputTokens: this.options.modelMaxTokens ?? maxTokens ?? undefined,
+			maxOutputTokens,
 			temperature: temperatureConfig,
 			...(tools.length > 0 ? { tools } : {}),
 		}
 
 		const params: GenerateContentParameters = { model, contents, config }
-
 		try {
 			const result = await this.client.models.generateContentStream(params)
 
diff --git a/src/api/transform/__tests__/reasoning.spec.ts b/src/api/transform/__tests__/reasoning.spec.ts
@@ -7,11 +7,13 @@ import {
 	getAnthropicReasoning,
 	getOpenAiReasoning,
 	getRooReasoning,
+	getGeminiReasoning,
 	GetModelReasoningOptions,
 	OpenRouterReasoningParams,
 	AnthropicReasoningParams,
 	OpenAiReasoningParams,
 	RooReasoningParams,
+	GeminiReasoningParams,
 } from "../reasoning"
 
 describe("reasoning.ts", () => {
@@ -587,6 +589,61 @@ describe("reasoning.ts", () => {
 		})
 	})
 
+	describe("Gemini reasoning (effort models)", () => {
+		it("should return thinkingLevel when effort is set to low or high and budget is not used", () => {
+			const geminiModel: ModelInfo = {
+				...baseModel,
+				// Effort-only reasoning model (no budget fields)
+				supportsReasoningEffort: ["low", "high"] as ModelInfo["supportsReasoningEffort"],
+				reasoningEffort: "low",
+			}
+
+			const settings: ProviderSettings = {
+				apiProvider: "gemini",
+				enableReasoningEffort: true,
+				reasoningEffort: "high",
+			}
+
+			const options: GetModelReasoningOptions = {
+				model: geminiModel,
+				reasoningBudget: 2048,
+				reasoningEffort: "high",
+				settings,
+			}
+
+			const result = getGeminiReasoning(options) as GeminiReasoningParams | undefined
+
+			// Budget should not be used for effort-only models
+			expect(result).toEqual({ thinkingLevel: "high", includeThoughts: true })
+		})
+
+		it("should still return thinkingLevel when enableReasoningEffort is false but effort is explicitly set", () => {
+			const geminiModel: ModelInfo = {
+				...baseModel,
+				// Effort-only reasoning model
+				supportsReasoningEffort: ["low", "high"] as ModelInfo["supportsReasoningEffort"],
+				reasoningEffort: "low",
+			}
+
+			const settings: ProviderSettings = {
+				apiProvider: "gemini",
+				// Even with this flag false, an explicit effort selection should win
+				enableReasoningEffort: false,
+				reasoningEffort: "high",
+			}
+
+			const options: GetModelReasoningOptions = {
+				model: geminiModel,
+				reasoningBudget: 2048,
+				reasoningEffort: "high",
+				settings,
+			}
+
+			const result = getGeminiReasoning(options) as GeminiReasoningParams | undefined
+			expect(result).toEqual({ thinkingLevel: "high", includeThoughts: true })
+		})
+	})
+
 	describe("Integration scenarios", () => {
 		it("should handle model with requiredReasoningBudget across all providers", () => {
 			const modelWithRequired: ModelInfo = {
diff --git a/src/api/transform/reasoning.ts b/src/api/transform/reasoning.ts
@@ -116,24 +116,21 @@ export const getGeminiReasoning = ({
 		return { thinkingBudget: reasoningBudget!, includeThoughts: true }
 	}
 
-	// If reasoning effort shouldn't be used (toggle off, unsupported capability, etc.),
-	// do not send a thinkingConfig at all.
-	if (!shouldUseReasoningEffort({ model, settings })) {
-		return undefined
-	}
-
-	// Effort-based models on Google GenAI: only support explicit low/high levels.
+	// For effort-based Gemini models, rely directly on the selected effort value.
+	// We intentionally ignore enableReasoningEffort here so that explicitly chosen
+	// efforts in the UI (e.g. "High" for gemini-3-pro-preview) always translate
+	// into a thinkingConfig, regardless of legacy boolean flags.
 	const selectedEffort = (settings.reasoningEffort ?? model.reasoningEffort) as
 		| ReasoningEffortExtended
 		| "disable"
 		| undefined
 
-	// Respect “off” / unset semantics.
+	// Respect “off” / unset semantics from the effort selector itself.
 	if (!selectedEffort || selectedEffort === "disable") {
 		return undefined
 	}
 
-	// Only map "low" and "high" to thinkingLevel; ignore other values.
+	// Effort-based models on Google GenAI currently support only explicit low/high levels.
 	if (selectedEffort !== "low" && selectedEffort !== "high") {
 		return undefined
 	}