RooCodeInc · cte · Dec 16, 2025 · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025
@@ -1,6 +1,9 @@
 import type { ModelInfo } from "../model.js"
 
 // https://platform.deepseek.com/docs/api
+// preserveReasoning enables interleaved thinking mode for tool calls:
+// DeepSeek requires reasoning_content to be passed back during tool call
+// continuation within the same turn. See: https://api-docs.deepseek.com/guides/thinking_mode
 export type DeepSeekModelId = keyof typeof deepSeekModels
 
 export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
@@ -26,6 +29,7 @@ export const deepSeekModels = {
 		supportsPromptCache: true,
 		supportsNativeTools: true,
 		defaultToolProtocol: "native",
+		preserveReasoning: true,
 		inputPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
 		outputPrice: 0.42, // $0.42 per million tokens - Updated Dec 9, 2025
 		cacheWritesPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
@@ -35,4 +39,4 @@ export const deepSeekModels = {
 } as const satisfies Record<string, ModelInfo>
 
 // https://api-docs.deepseek.com/quick_start/parameter_settings
-export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0
+export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.3
@@ -29,23 +29,75 @@ vi.mock("openai", () => {
 							}
 						}
 
+						// Check if this is a reasoning_content test by looking at model
+						const isReasonerModel = options.model?.includes("deepseek-reasoner")
+						const isToolCallTest = options.tools?.length > 0
+
 						// Return async iterator for streaming
 						return {
 							[Symbol.asyncIterator]: async function* () {
-								yield {
-									choices: [
-										{
-											delta: { content: "Test response" },
-											index: 0,
-										},
-									],
-									usage: null,
+								// For reasoner models, emit reasoning_content first
+								if (isReasonerModel) {
+									yield {
+										choices: [
+											{
+												delta: { reasoning_content: "Let me think about this..." },
+												index: 0,
+											},
+										],
+										usage: null,
+									}
+									yield {
+										choices: [
+											{
+												delta: { reasoning_content: " I'll analyze step by step." },
+												index: 0,
+											},
+										],
+										usage: null,
+									}
 								}
+
+								// For tool call tests with reasoner, emit tool call
+								if (isReasonerModel && isToolCallTest) {
+									yield {
+										choices: [
+											{
+												delta: {
+													tool_calls: [
+														{
+															index: 0,
+															id: "call_123",
+															function: {
+																name: "get_weather",
+																arguments: '{"location":"SF"}',
+															},
+														},
+													],
+												},
+												index: 0,
+											},
+										],
+										usage: null,
+									}
+								} else {
+									yield {
+										choices: [
+											{
+												delta: { content: "Test response" },
+												index: 0,
+											},
+										],
+										usage: null,
+									}
+								}
+
 								yield {
 									choices: [
 										{
 											delta: {},
 											index: 0,
+											finish_reason: isToolCallTest ? "tool_calls" : "stop",
 										},
 									],
 									usage: {
@@ -70,7 +122,7 @@ vi.mock("openai", () => {
 import OpenAI from "openai"
 import type { Anthropic } from "@anthropic-ai/sdk"
 
-import { deepSeekDefaultModelId } from "@roo-code/types"
+import { deepSeekDefaultModelId, type ModelInfo } from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../../shared/api"
 
@@ -174,6 +226,27 @@ describe("DeepSeekHandler", () => {
 			expect(model.info.supportsPromptCache).toBe(true)
 		})
 
+		it("should have preserveReasoning enabled for deepseek-reasoner to support interleaved thinking", () => {
+			// This is critical for DeepSeek's interleaved thinking mode with tool calls.
+			// See: https://api-docs.deepseek.com/guides/thinking_mode
+			// The reasoning_content needs to be passed back during tool call continuation
+			// within the same turn for the model to continue reasoning properly.
+			const handlerWithReasoner = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: "deepseek-reasoner",
+			})
+			const model = handlerWithReasoner.getModel()
+			// Cast to ModelInfo to access preserveReasoning which is an optional property
+			expect((model.info as ModelInfo).preserveReasoning).toBe(true)
+		})
+
+		it("should NOT have preserveReasoning enabled for deepseek-chat", () => {
+			// deepseek-chat doesn't use thinking mode, so no need to preserve reasoning
+			const model = handler.getModel()
+			// Cast to ModelInfo to access preserveReasoning which is an optional property
+			expect((model.info as ModelInfo).preserveReasoning).toBeUndefined()
+		})
+
 		it("should return provided model ID with default model info if model does not exist", () => {
 			const handlerWithInvalidModel = new DeepSeekHandler({
 				...mockOptions,
@@ -317,4 +390,108 @@ describe("DeepSeekHandler", () => {
 			expect(result.cacheReadTokens).toBeUndefined()
 		})
 	})
+
+	describe("interleaved thinking mode", () => {
+		const systemPrompt = "You are a helpful assistant."
+		const messages: Anthropic.Messages.MessageParam[] = [
+			{
+				role: "user",
+				content: [
+					{
+						type: "text" as const,
+						text: "Hello!",
+					},
+				],
+			},
+		]
+
+		it("should handle reasoning_content in streaming responses for deepseek-reasoner", async () => {
+			const reasonerHandler = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: "deepseek-reasoner",
+			})
+
+			const stream = reasonerHandler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Should have reasoning chunks
+			const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
+			expect(reasoningChunks.length).toBeGreaterThan(0)
+			expect(reasoningChunks[0].text).toBe("Let me think about this...")
+			expect(reasoningChunks[1].text).toBe(" I'll analyze step by step.")
+		})
+
+		it("should pass thinking parameter for deepseek-reasoner model", async () => {
+			const reasonerHandler = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: "deepseek-reasoner",
+			})
+
+			const stream = reasonerHandler.createMessage(systemPrompt, messages)
+			for await (const _chunk of stream) {
+				// Consume the stream
+			}
+
+			// Verify that the thinking parameter was passed to the API
+			// Note: mockCreate receives two arguments - request options and path options
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					thinking: { type: "enabled" },
+				}),
+				{}, // Empty path options for non-Azure URLs
+			)
+		})
+
+		it("should NOT pass thinking parameter for deepseek-chat model", async () => {
+			const chatHandler = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: "deepseek-chat",
+			})
+
+			const stream = chatHandler.createMessage(systemPrompt, messages)
+			for await (const _chunk of stream) {
+				// Consume the stream
+			}
+
+			// Verify that the thinking parameter was NOT passed to the API
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.thinking).toBeUndefined()
+		})
+
+		it("should handle tool calls with reasoning_content", async () => {
+			const reasonerHandler = new DeepSeekHandler({
+				...mockOptions,
+				apiModelId: "deepseek-reasoner",
+			})
+
+			const tools: any[] = [
+				{
+					type: "function",
+					function: {
+						name: "get_weather",
+						description: "Get weather",
+						parameters: { type: "object", properties: {} },
+					},
+				},
+			]
+
+			const stream = reasonerHandler.createMessage(systemPrompt, messages, { taskId: "test", tools })
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Should have reasoning chunks
+			const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
+			expect(reasoningChunks.length).toBeGreaterThan(0)
+
+			// Should have tool call chunks
+			const toolCallChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
+			expect(toolCallChunks.length).toBeGreaterThan(0)
+			expect(toolCallChunks[0].name).toBe("get_weather")
+		})
+	})
 })
@@ -1,11 +1,26 @@
-import { deepSeekModels, deepSeekDefaultModelId } from "@roo-code/types"
+import { Anthropic } from "@anthropic-ai/sdk"
+import OpenAI from "openai"
+
+import {
+	deepSeekModels,
+	deepSeekDefaultModelId,
+	DEEP_SEEK_DEFAULT_TEMPERATURE,
+	OPENAI_AZURE_AI_INFERENCE_PATH,
+} from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../shared/api"
 
-import type { ApiStreamUsageChunk } from "../transform/stream"
+import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { getModelParams } from "../transform/model-params"
+import { convertToR1Format } from "../transform/r1-format"
 
 import { OpenAiHandler } from "./openai"
+import type { ApiHandlerCreateMessageMetadata } from "../index"
+
+// Custom interface for DeepSeek params to support thinking mode
+type DeepSeekChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParamsStreaming & {
+	thinking?: { type: "enabled" | "disabled" }
+}
 
 export class DeepSeekHandler extends OpenAiHandler {
 	constructor(options: ApiHandlerOptions) {
@@ -26,8 +41,100 @@ export class DeepSeekHandler extends OpenAiHandler {
 		return { id, info, ...params }
 	}
 
+	override async *createMessage(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		metadata?: ApiHandlerCreateMessageMetadata,
+	): ApiStream {
+		const modelId = this.options.apiModelId ?? deepSeekDefaultModelId
+		const { info: modelInfo } = this.getModel()
+
+		// Check if this is a thinking-enabled model (deepseek-reasoner)
+		const isThinkingModel = modelId.includes("deepseek-reasoner")
+
+		// Convert messages to R1 format (merges consecutive same-role messages)
+		// This is required for DeepSeek which does not support successive messages with the same role
+		const convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
+
+		const requestOptions: DeepSeekChatCompletionParams = {
+			model: modelId,
+			temperature: this.options.modelTemperature ?? DEEP_SEEK_DEFAULT_TEMPERATURE,
+			messages: convertedMessages,
+			stream: true as const,
+			stream_options: { include_usage: true },
+			// Enable thinking mode for deepseek-reasoner or when tools are used with thinking model
+			...(isThinkingModel && { thinking: { type: "enabled" } }),
+			...(metadata?.tools && { tools: this.convertToolsForOpenAI(metadata.tools) }),
+			...(metadata?.tool_choice && { tool_choice: metadata.tool_choice }),
+			...(metadata?.toolProtocol === "native" && {
+				parallel_tool_calls: metadata.parallelToolCalls ?? false,
+			}),
+		}
+
+		// Add max_tokens if needed
+		this.addMaxTokensIfNeeded(requestOptions, modelInfo)
+
+		// Check if base URL is Azure AI Inference (for DeepSeek via Azure)
+		const isAzureAiInference = this._isAzureAiInference(this.options.deepSeekBaseUrl)
+
+		let stream
+		try {
+			stream = await this.client.chat.completions.create(
+				requestOptions,
+				isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
+			)
+		} catch (error) {
+			const { handleOpenAIError } = await import("./utils/openai-error-handler")
+			throw handleOpenAIError(error, "DeepSeek")
+		}
+
+		let lastUsage
+
+		for await (const chunk of stream) {
+			const delta = chunk.choices?.[0]?.delta ?? {}
+
+			// Handle regular text content
+			if (delta.content) {
+				yield {
+					type: "text",
+					text: delta.content,
+				}
+			}
+
+			// Handle reasoning_content from DeepSeek's interleaved thinking
+			// This is the proper way DeepSeek sends thinking content in streaming
+			if ("reasoning_content" in delta && delta.reasoning_content) {
+				yield {
+					type: "reasoning",
+					text: (delta.reasoning_content as string) || "",
+				}
+			}
+
+			// Handle tool calls
+			if (delta.tool_calls) {
+				for (const toolCall of delta.tool_calls) {
+					yield {
+						type: "tool_call_partial",
+						index: toolCall.index,
+						id: toolCall.id,
+						name: toolCall.function?.name,
+						arguments: toolCall.function?.arguments,
+					}
+				}
+			}
+
+			if (chunk.usage) {
+				lastUsage = chunk.usage
+			}
+		}
+
+		if (lastUsage) {
+			yield this.processUsageMetrics(lastUsage, modelInfo)
+		}
+	}
+
 	// Override to handle DeepSeek's usage metrics, including caching.
-	protected override processUsageMetrics(usage: any): ApiStreamUsageChunk {
+	protected override processUsageMetrics(usage: any, _modelInfo?: any): ApiStreamUsageChunk {
 		return {
 			type: "usage",
 			inputTokens: usage?.prompt_tokens || 0,