Skip to content
This repository was archived by the owner on May 15, 2026. It is now read-only.
13 changes: 13 additions & 0 deletions packages/types/src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,19 @@ export const modelInfoSchema = z.object({
supportsReasoningBudget: z.boolean().optional(),
// Capability flag to indicate whether the model supports simple on/off binary reasoning
supportsReasoningBinary: z.boolean().optional(),
/**
* Capability flag to indicate whether the model supports interleaved thinking.
* When true, the model emits `reasoning_content` alongside `content` in responses.
* Examples: DeepSeek reasoner, Kimi K2 Thinking, Minimax M2.
*/
supportsInterleavedThinking: z.boolean().optional(),
/**
* Provider-specific parameters needed to enable interleaved thinking.
* Different providers may use different parameter formats.
* Example: DeepSeek uses `{ thinking: { type: "enabled" } }`.
* This parameter is passed via `extra_body` or similar mechanism.
*/
interleavedThinkingParam: z.record(z.any()).optional(),
// Capability flag to indicate whether the model supports temperature parameter
supportsTemperature: z.boolean().optional(),
defaultTemperature: z.number().optional(),
Expand Down
4 changes: 4 additions & 0 deletions packages/types/src/providers/deepseek.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ export const deepSeekModels = {
supportsImages: false,
supportsPromptCache: true,
supportsNativeTools: true,
// Enables interleaved thinking mode (reasoning_content field)
supportsInterleavedThinking: true,
// Parameter passed via extra_body to enable thinking mode
interleavedThinkingParam: { thinking: { type: "enabled" } },
inputPrice: 0.56, // $0.56 per million tokens (cache miss) - Updated Sept 5, 2025
outputPrice: 1.68, // $1.68 per million tokens - Updated Sept 5, 2025
cacheWritesPrice: 0.56, // $0.56 per million tokens (cache miss) - Updated Sept 5, 2025
Expand Down
1 change: 1 addition & 0 deletions packages/types/src/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,7 @@ export const openAiModelInfoSaneDefaults: ModelInfo = {
inputPrice: 0,
outputPrice: 0,
supportsNativeTools: true,
supportsInterleavedThinking: false,
}

// https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
Expand Down
11 changes: 10 additions & 1 deletion src/api/providers/__tests__/deepseek.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ vi.mock("openai", () => {
import OpenAI from "openai"
import type { Anthropic } from "@anthropic-ai/sdk"

import { deepSeekDefaultModelId } from "@roo-code/types"
import { deepSeekDefaultModelId, type ModelInfo } from "@roo-code/types"

import type { ApiHandlerOptions } from "../../../shared/api"

Expand Down Expand Up @@ -172,6 +172,15 @@ describe("DeepSeekHandler", () => {
expect(model.info.contextWindow).toBe(128_000)
expect(model.info.supportsImages).toBe(false)
expect(model.info.supportsPromptCache).toBe(true)
// Verify interleaved thinking capability flags
expect((model.info as ModelInfo).supportsInterleavedThinking).toBe(true)
expect((model.info as ModelInfo).interleavedThinkingParam).toEqual({ thinking: { type: "enabled" } })
})

it("should not have interleaved thinking flags for deepseek-chat", () => {
const model = handler.getModel()
expect((model.info as ModelInfo).supportsInterleavedThinking).toBeUndefined()
expect((model.info as ModelInfo).interleavedThinkingParam).toBeUndefined()
})

it("should return provided model ID with default model info if model does not exist", () => {
Expand Down
144 changes: 126 additions & 18 deletions src/api/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ import { XmlMatcher } from "../../utils/xml-matcher"
import { convertToOpenAiMessages } from "../transform/openai-format"
import { convertToR1Format } from "../transform/r1-format"
import { convertToSimpleMessages } from "../transform/simple-format"
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
import { isNewUserTurn } from "../transform/detect-turn-boundary"
import { ApiStream, ApiStreamUsageChunk, type ApiStreamToolCallPartialChunk } from "../transform/stream"
import { getModelParams } from "../transform/model-params"

import { DEFAULT_HEADERS } from "./constants"
Expand Down Expand Up @@ -85,13 +86,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
messages: Anthropic.Messages.MessageParam[],
metadata?: ApiHandlerCreateMessageMetadata,
): ApiStream {
const { info: modelInfo, reasoning } = this.getModel()
const { info: modelInfo, reasoning, temperature } = this.getModel()
const modelUrl = this.options.openAiBaseUrl ?? ""
const modelId = this.options.openAiModelId ?? ""
const enabledR1Format = this.options.openAiR1FormatEnabled ?? false
const enabledLegacyFormat = this.options.openAiLegacyFormat ?? false
const isAzureAiInference = this._isAzureAiInference(modelUrl)
const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
const supportsInterleavedThinking = modelInfo?.supportsInterleavedThinking === true
const ark = modelUrl.includes(".volces.com")

if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) {
Expand All @@ -107,8 +108,16 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
if (this.options.openAiStreamingEnabled ?? true) {
let convertedMessages

if (deepseekReasoner) {
convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
if (supportsInterleavedThinking) {
// For interleaved thinking models, conditionally clear reasoning_content:
// - Clear for new user turns (preserve only final answers)
// - Preserve during tool call sequences (required by API)
const allMessages: Anthropic.Messages.MessageParam[] = [
{ role: "user" as const, content: systemPrompt },
...messages,
]
const shouldClearReasoning = isNewUserTurn(allMessages)
convertedMessages = convertToR1Format(allMessages, shouldClearReasoning)
} else if (ark || enabledLegacyFormat) {
convertedMessages = [systemMessage, ...convertToSimpleMessages(messages)]
} else {
Expand Down Expand Up @@ -159,7 +168,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl

const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
model: modelId,
temperature: this.options.modelTemperature ?? (deepseekReasoner ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),
temperature,
messages: convertedMessages,
stream: true as const,
...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
Expand All @@ -171,6 +180,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}),
}

// Add interleaved thinking parameter if supported
if (supportsInterleavedThinking && modelInfo?.interleavedThinkingParam) {
// @ts-ignore-next-line - extra_body is not in the type definition but is supported by OpenAI API
requestOptions.extra_body = modelInfo.interleavedThinkingParam
}

// Add max_tokens if needed
this.addMaxTokensIfNeeded(requestOptions, modelInfo)

Expand All @@ -193,33 +208,86 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}) as const,
)

// Accumulation state for interleaved thinking mode
// According to API documentation for interleaved thinking, chunks contain either reasoning_content OR content, not both
// However, tool_calls may appear alongside either reasoning_content or content
let reasoningAccumulator = ""
let isReasoningPhase = true
let hasEmittedReasoning = false

let lastUsage
let finalReasoningContent = ""
let finalContent = ""
let finalToolCalls: any[] = []

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These two variables (finalReasoningContent and finalContent) are declared but never used. The comment on line 266 mentions "Track tool calls for debug logging" but only finalToolCalls is actually used (to accumulate tool call data). Consider removing these unused variables to reduce dead code.

Suggested change
let finalReasoningContent = ""
let finalContent = ""
let finalToolCalls: any[] = []
let finalToolCalls: any[] = []

Fix it with Roo Code or mention @roomote and request a fix.

let toolCallBuffer: ApiStreamToolCallPartialChunk[] = []

for await (const chunk of stream) {
const delta = chunk.choices?.[0]?.delta ?? {}

// Handle reasoning_content accumulation (interleaved thinking mode)
if ("reasoning_content" in delta && delta.reasoning_content) {
reasoningAccumulator += (delta.reasoning_content as string | undefined) || ""
isReasoningPhase = true
// Note: Continue to process tool_calls and usage in same chunk if present
}

// Handle content - if we were in reasoning phase, emit accumulated reasoning first
if (delta.content) {
for (const chunk of matcher.update(delta.content)) {
yield chunk
// Transition from reasoning to content phase
if (isReasoningPhase && reasoningAccumulator && !hasEmittedReasoning) {
yield {
type: "reasoning",
text: reasoningAccumulator,
}
hasEmittedReasoning = true
reasoningAccumulator = ""
}
}

if ("reasoning_content" in delta && delta.reasoning_content) {
yield {
type: "reasoning",
text: (delta.reasoning_content as string | undefined) || "",
// Emit buffered tool calls before processing content
for (const toolCall of toolCallBuffer) {
yield toolCall
}
toolCallBuffer = []

isReasoningPhase = false

// Process content as usual
for (const chunk of matcher.update(delta.content)) {
yield chunk
}
}

// Handle tool calls (can occur during reasoning or content phase)
// Note: Reasoning may continue after tool calls, so we don't emit reasoning here
// Reasoning will be emitted when transitioning to content phase or at stream end
// Buffer tool calls instead of yielding immediately to ensure reasoning appears first
if (delta.tool_calls) {
for (const toolCall of delta.tool_calls) {
yield {
// Track tool calls for debug logging
if (toolCall.index !== undefined) {
if (!finalToolCalls[toolCall.index]) {
finalToolCalls[toolCall.index] = {
id: toolCall.id,
type: toolCall.type,
function: { name: toolCall.function?.name, arguments: "" },
}
}
if (toolCall.function?.name) {
finalToolCalls[toolCall.index].function.name = toolCall.function.name
}
if (toolCall.function?.arguments) {
finalToolCalls[toolCall.index].function.arguments += toolCall.function.arguments
}
}
// Buffer tool calls instead of yielding immediately
// Default index to 0 if undefined (required by type)
toolCallBuffer.push({
type: "tool_call_partial",
index: toolCall.index,
index: toolCall.index ?? 0,
id: toolCall.id,
name: toolCall.function?.name,
arguments: toolCall.function?.arguments,
}
})
}
}

Expand All @@ -228,6 +296,22 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}
}

// Emit any remaining accumulated reasoning content at stream end
// This handles cases where stream ends during reasoning phase
if (reasoningAccumulator && !hasEmittedReasoning) {
yield {
type: "reasoning",
text: reasoningAccumulator,
}
}

// Emit any buffered tool calls after reasoning is emitted
// This ensures reasoning appears before tool calls in the UI
for (const toolCall of toolCallBuffer) {
yield toolCall
}
toolCallBuffer = []

for (const chunk of matcher.final()) {
yield chunk
}
Expand All @@ -238,8 +322,18 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
} else {
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
model: modelId,
messages: deepseekReasoner
? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
messages: supportsInterleavedThinking
? (() => {
// For interleaved thinking models, conditionally clear reasoning_content:
// - Clear for new user turns (preserve only final answers)
// - Preserve during tool call sequences (required by API)
const allMessages: Anthropic.Messages.MessageParam[] = [
{ role: "user" as const, content: systemPrompt },
...messages,
]
const shouldClearReasoning = isNewUserTurn(allMessages)
return convertToR1Format(allMessages, shouldClearReasoning)
})()
: enabledLegacyFormat
? [systemMessage, ...convertToSimpleMessages(messages)]
: [systemMessage, ...convertToOpenAiMessages(messages)],
Expand All @@ -250,6 +344,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}),
}

// Add interleaved thinking parameter if supported
if (supportsInterleavedThinking && modelInfo?.interleavedThinkingParam) {
// @ts-ignore-next-line - extra_body is not in the type definition but is supported by OpenAI API
requestOptions.extra_body = modelInfo.interleavedThinkingParam
}

// Add max_tokens if needed
this.addMaxTokensIfNeeded(requestOptions, modelInfo)

Expand Down Expand Up @@ -278,6 +378,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}
}

// Handle reasoning_content for interleaved thinking models
if (supportsInterleavedThinking && "reasoning_content" in message && message.reasoning_content) {
yield {
type: "reasoning",
text: (message.reasoning_content as string | undefined) || "",
}
}

yield {
type: "text",
text: message?.content || "",
Expand Down
Loading
Loading