From 5f5c00ffc14b1f07f9500881861cf3153b737087 Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Fri, 19 Dec 2025 12:54:25 +0000
Subject: [PATCH] fix: handle empty response with reasoning in Requesty
 provider

Add safeguard to emit placeholder text when model returns only
reasoning/thinking content without actual text or tool calls.
This prevents "empty assistant response" errors that can occur
when models output malformed tool call syntax in their reasoning.

Includes tests for reasoning-only response handling.
---
 src/api/providers/__tests__/requesty.spec.ts | 120 +++++++++++++++++++
 src/api/providers/requesty.ts                |  16 +++
 2 files changed, 136 insertions(+)

diff --git a/src/api/providers/__tests__/requesty.spec.ts b/src/api/providers/__tests__/requesty.spec.ts
index df799426a72..bca440de1eb 100644
--- a/src/api/providers/__tests__/requesty.spec.ts
+++ b/src/api/providers/__tests__/requesty.spec.ts
@@ -2,6 +2,7 @@
 
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
+import { t } from "i18next"
 
 import { TOOL_PROTOCOL } from "@roo-code/types"
 
@@ -378,6 +379,125 @@ describe("RequestyHandler", () => {
 				})
 			})
 		})
+
+		describe("reasoning-only response handling", () => {
+			it("should emit placeholder text when model returns only reasoning content", async () => {
+				const handler = new RequestyHandler(mockOptions)
+
+				// Mock stream that only returns reasoning content without actual text or tool calls
+				const mockStreamWithOnlyReasoning = {
+					async *[Symbol.asyncIterator]() {
+						yield {
+							id: "test-id",
+							choices: [
+								{
+									delta: {
+										reasoning_content: "I am thinking about how to respond...",
+									},
+								},
+							],
+						}
+						yield {
+							id: "test-id",
+							choices: [
+								{
+									delta: {
+										reasoning_content:
+											"The user wants me to use a tool, but I'll format it wrong: <tool_call><function=get_weather>",
+									},
+								},
+							],
+						}
+						yield {
+							id: "test-id",
+							choices: [{ delta: {} }],
+							usage: { prompt_tokens: 10, completion_tokens: 20 },
+						}
+					},
+				}
+				mockCreate.mockResolvedValue(mockStreamWithOnlyReasoning)
+
+				const systemPrompt = "test system prompt"
+				const messages: Anthropic.Messages.MessageParam[] = [{ role: "user" as const, content: "test message" }]
+
+				const chunks = []
+				for await (const chunk of handler.createMessage(systemPrompt, messages)) {
+					chunks.push(chunk)
+				}
+
+				// Expect two reasoning chunks, one fallback text chunk, and one usage chunk
+				expect(chunks).toHaveLength(4)
+				expect(chunks[0]).toEqual({ type: "reasoning", text: "I am thinking about how to respond..." })
+				expect(chunks[1]).toEqual({
+					type: "reasoning",
+					text: "The user wants me to use a tool, but I'll format it wrong: <tool_call><function=get_weather>",
+				})
+				// The fallback text to prevent empty response error
+				expect(chunks[2]).toEqual({
+					type: "text",
+					text: t("common:errors.gemini.thinking_complete_no_output"),
+				})
+				expect(chunks[3]).toMatchObject({
+					type: "usage",
+					inputTokens: 10,
+					outputTokens: 20,
+				})
+			})
+
+			it("should not emit placeholder when model returns actual content", async () => {
+				const handler = new RequestyHandler(mockOptions)
+
+				// Mock stream that returns both reasoning and text content
+				const mockStreamWithContent = {
+					async *[Symbol.asyncIterator]() {
+						yield {
+							id: "test-id",
+							choices: [
+								{
+									delta: {
+										reasoning_content: "Thinking...",
+									},
+								},
+							],
+						}
+						yield {
+							id: "test-id",
+							choices: [
+								{
+									delta: {
+										content: "Here is my actual response",
+									},
+								},
+							],
+						}
+						yield {
+							id: "test-id",
+							choices: [{ delta: {} }],
+							usage: { prompt_tokens: 10, completion_tokens: 20 },
+						}
+					},
+				}
+				mockCreate.mockResolvedValue(mockStreamWithContent)
+
+				const systemPrompt = "test system prompt"
+				const messages: Anthropic.Messages.MessageParam[] = [{ role: "user" as const, content: "test message" }]
+
+				const chunks = []
+				for await (const chunk of handler.createMessage(systemPrompt, messages)) {
+					chunks.push(chunk)
+				}
+
+				// Expect one reasoning chunk, one text chunk, and one usage chunk (no fallback)
+				expect(chunks).toHaveLength(3)
+				expect(chunks[0]).toEqual({ type: "reasoning", text: "Thinking..." })
+				expect(chunks[1]).toEqual({ type: "text", text: "Here is my actual response" })
+				expect(chunks[2]).toMatchObject({
+					type: "usage",
+					inputTokens: 10,
+					outputTokens: 20,
+				})
+			})
+		})
 	})
 
 	describe("completePrompt", () => {
diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts
index b84a36bcc16..b28f64d1d70 100644
--- a/src/api/providers/requesty.ts
+++ b/src/api/providers/requesty.ts
@@ -1,5 +1,6 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
+import { t } from "i18next"
 
 import { type ModelInfo, requestyDefaultModelId, requestyDefaultModelInfo, TOOL_PROTOCOL } from "@roo-code/types"
 
@@ -166,19 +167,26 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
 		}
 		let lastUsage: any = undefined
 
+		// Track whether we've received actual content vs just reasoning
+		let hasContent = false
+		let hasReasoning = false
+
 		for await (const chunk of stream) {
 			const delta = chunk.choices[0]?.delta
 
 			if (delta?.content) {
+				hasContent = true
 				yield { type: "text", text: delta.content }
 			}
 
 			if (delta && "reasoning_content" in delta && delta.reasoning_content) {
+				hasReasoning = true
 				yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" }
 			}
 
 			// Handle native tool calls
 			if (delta && "tool_calls" in delta && Array.isArray(delta.tool_calls)) {
+				hasContent = true
 				for (const toolCall of delta.tool_calls) {
 					yield {
 						type: "tool_call_partial",
@@ -195,6 +203,14 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
 			}
 		}
 
+		// If model produced reasoning but no actual content (text or tool calls),
+		// emit a placeholder to prevent "empty assistant response" errors.
+		// This can happen when models output malformed tool call syntax in their
+		// reasoning/thinking content (e.g., <tool_call><function=...> tags).
+		if (hasReasoning && !hasContent) {
+			yield { type: "text", text: t("common:errors.gemini.thinking_complete_no_output") }
+		}
+
 		if (lastUsage) {
 			yield this.processUsageMetrics(lastUsage, info)
 		}