Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion packages/types/src/providers/deepseek.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import type { ModelInfo } from "../model.js"

// https://platform.deepseek.com/docs/api
// preserveReasoning enables interleaved thinking mode for tool calls:
// DeepSeek requires reasoning_content to be passed back during tool call
// continuation within the same turn. See: https://api-docs.deepseek.com/guides/thinking_mode
export type DeepSeekModelId = keyof typeof deepSeekModels

export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
Expand All @@ -26,6 +29,7 @@ export const deepSeekModels = {
supportsPromptCache: true,
supportsNativeTools: true,
defaultToolProtocol: "native",
preserveReasoning: true,
inputPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
outputPrice: 0.42, // $0.42 per million tokens - Updated Dec 9, 2025
cacheWritesPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
Expand All @@ -35,4 +39,4 @@ export const deepSeekModels = {
} as const satisfies Record<string, ModelInfo>

// https://api-docs.deepseek.com/quick_start/parameter_settings
export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0
export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.3
195 changes: 186 additions & 9 deletions src/api/providers/__tests__/deepseek.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,75 @@ vi.mock("openai", () => {
}
}

// Check if this is a reasoning_content test by looking at model
const isReasonerModel = options.model?.includes("deepseek-reasoner")
const isToolCallTest = options.tools?.length > 0

// Return async iterator for streaming
return {
[Symbol.asyncIterator]: async function* () {
yield {
choices: [
{
delta: { content: "Test response" },
index: 0,
},
],
usage: null,
// For reasoner models, emit reasoning_content first
if (isReasonerModel) {
yield {
choices: [
{
delta: { reasoning_content: "Let me think about this..." },
index: 0,
},
],
usage: null,
}
yield {
choices: [
{
delta: { reasoning_content: " I'll analyze step by step." },
index: 0,
},
],
usage: null,
}
}

// For tool call tests with reasoner, emit tool call
if (isReasonerModel && isToolCallTest) {
yield {
choices: [
{
delta: {
tool_calls: [
{
index: 0,
id: "call_123",
function: {
name: "get_weather",
arguments: '{"location":"SF"}',
},
},
],
},
index: 0,
},
],
usage: null,
}
} else {
yield {
choices: [
{
delta: { content: "Test response" },
index: 0,
},
],
usage: null,
}
}

yield {
choices: [
{
delta: {},
index: 0,
finish_reason: isToolCallTest ? "tool_calls" : "stop",
},
],
usage: {
Expand All @@ -70,7 +122,7 @@ vi.mock("openai", () => {
import OpenAI from "openai"
import type { Anthropic } from "@anthropic-ai/sdk"

import { deepSeekDefaultModelId } from "@roo-code/types"
import { deepSeekDefaultModelId, type ModelInfo } from "@roo-code/types"

import type { ApiHandlerOptions } from "../../../shared/api"

Expand Down Expand Up @@ -174,6 +226,27 @@ describe("DeepSeekHandler", () => {
expect(model.info.supportsPromptCache).toBe(true)
})

it("should have preserveReasoning enabled for deepseek-reasoner to support interleaved thinking", () => {
// This is critical for DeepSeek's interleaved thinking mode with tool calls.
// See: https://api-docs.deepseek.com/guides/thinking_mode
// The reasoning_content needs to be passed back during tool call continuation
// within the same turn for the model to continue reasoning properly.
const handlerWithReasoner = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-reasoner",
})
const model = handlerWithReasoner.getModel()
// Cast to ModelInfo to access preserveReasoning which is an optional property
expect((model.info as ModelInfo).preserveReasoning).toBe(true)
})

it("should NOT have preserveReasoning enabled for deepseek-chat", () => {
// deepseek-chat doesn't use thinking mode, so no need to preserve reasoning
const model = handler.getModel()
// Cast to ModelInfo to access preserveReasoning which is an optional property
expect((model.info as ModelInfo).preserveReasoning).toBeUndefined()
})

it("should return provided model ID with default model info if model does not exist", () => {
const handlerWithInvalidModel = new DeepSeekHandler({
...mockOptions,
Expand Down Expand Up @@ -317,4 +390,108 @@ describe("DeepSeekHandler", () => {
expect(result.cacheReadTokens).toBeUndefined()
})
})

describe("interleaved thinking mode", () => {
const systemPrompt = "You are a helpful assistant."
const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: [
{
type: "text" as const,
text: "Hello!",
},
],
},
]

it("should handle reasoning_content in streaming responses for deepseek-reasoner", async () => {
const reasonerHandler = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-reasoner",
})

const stream = reasonerHandler.createMessage(systemPrompt, messages)
const chunks: any[] = []
for await (const chunk of stream) {
chunks.push(chunk)
}

// Should have reasoning chunks
const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
expect(reasoningChunks.length).toBeGreaterThan(0)
expect(reasoningChunks[0].text).toBe("Let me think about this...")
expect(reasoningChunks[1].text).toBe(" I'll analyze step by step.")
})

it("should pass thinking parameter for deepseek-reasoner model", async () => {
const reasonerHandler = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-reasoner",
})

const stream = reasonerHandler.createMessage(systemPrompt, messages)
for await (const _chunk of stream) {
// Consume the stream
}

// Verify that the thinking parameter was passed to the API
// Note: mockCreate receives two arguments - request options and path options
expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
thinking: { type: "enabled" },
}),
{}, // Empty path options for non-Azure URLs
)
})

it("should NOT pass thinking parameter for deepseek-chat model", async () => {
const chatHandler = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-chat",
})

const stream = chatHandler.createMessage(systemPrompt, messages)
for await (const _chunk of stream) {
// Consume the stream
}

// Verify that the thinking parameter was NOT passed to the API
const callArgs = mockCreate.mock.calls[0][0]
expect(callArgs.thinking).toBeUndefined()
})

it("should handle tool calls with reasoning_content", async () => {
const reasonerHandler = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-reasoner",
})

const tools: any[] = [
{
type: "function",
function: {
name: "get_weather",
description: "Get weather",
parameters: { type: "object", properties: {} },
},
},
]

const stream = reasonerHandler.createMessage(systemPrompt, messages, { taskId: "test", tools })
const chunks: any[] = []
for await (const chunk of stream) {
chunks.push(chunk)
}

// Should have reasoning chunks
const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
expect(reasoningChunks.length).toBeGreaterThan(0)

// Should have tool call chunks
const toolCallChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
expect(toolCallChunks.length).toBeGreaterThan(0)
expect(toolCallChunks[0].name).toBe("get_weather")
})
})
})
113 changes: 110 additions & 3 deletions src/api/providers/deepseek.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,26 @@
import { deepSeekModels, deepSeekDefaultModelId } from "@roo-code/types"
import { Anthropic } from "@anthropic-ai/sdk"
import OpenAI from "openai"

import {
deepSeekModels,
deepSeekDefaultModelId,
DEEP_SEEK_DEFAULT_TEMPERATURE,
OPENAI_AZURE_AI_INFERENCE_PATH,
} from "@roo-code/types"

import type { ApiHandlerOptions } from "../../shared/api"

import type { ApiStreamUsageChunk } from "../transform/stream"
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
import { getModelParams } from "../transform/model-params"
import { convertToR1Format } from "../transform/r1-format"

import { OpenAiHandler } from "./openai"
import type { ApiHandlerCreateMessageMetadata } from "../index"

// Custom interface for DeepSeek params to support thinking mode
type DeepSeekChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParamsStreaming & {
thinking?: { type: "enabled" | "disabled" }
}

export class DeepSeekHandler extends OpenAiHandler {
constructor(options: ApiHandlerOptions) {
Expand All @@ -26,8 +41,100 @@ export class DeepSeekHandler extends OpenAiHandler {
return { id, info, ...params }
}

override async *createMessage(
systemPrompt: string,
messages: Anthropic.Messages.MessageParam[],
metadata?: ApiHandlerCreateMessageMetadata,
): ApiStream {
const modelId = this.options.apiModelId ?? deepSeekDefaultModelId
const { info: modelInfo } = this.getModel()

// Check if this is a thinking-enabled model (deepseek-reasoner)
const isThinkingModel = modelId.includes("deepseek-reasoner")

// Convert messages to R1 format (merges consecutive same-role messages)
// This is required for DeepSeek which does not support successive messages with the same role
const convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])

const requestOptions: DeepSeekChatCompletionParams = {
model: modelId,
temperature: this.options.modelTemperature ?? DEEP_SEEK_DEFAULT_TEMPERATURE,
messages: convertedMessages,
stream: true as const,
stream_options: { include_usage: true },
// Enable thinking mode for deepseek-reasoner or when tools are used with thinking model
...(isThinkingModel && { thinking: { type: "enabled" } }),
...(metadata?.tools && { tools: this.convertToolsForOpenAI(metadata.tools) }),
...(metadata?.tool_choice && { tool_choice: metadata.tool_choice }),
...(metadata?.toolProtocol === "native" && {
parallel_tool_calls: metadata.parallelToolCalls ?? false,
}),
}

// Add max_tokens if needed
this.addMaxTokensIfNeeded(requestOptions, modelInfo)

// Check if base URL is Azure AI Inference (for DeepSeek via Azure)
const isAzureAiInference = this._isAzureAiInference(this.options.deepSeekBaseUrl)

let stream
try {
stream = await this.client.chat.completions.create(
requestOptions,
isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
)
} catch (error) {
const { handleOpenAIError } = await import("./utils/openai-error-handler")
throw handleOpenAIError(error, "DeepSeek")
}

let lastUsage

for await (const chunk of stream) {
const delta = chunk.choices?.[0]?.delta ?? {}

// Handle regular text content
if (delta.content) {
yield {
type: "text",
text: delta.content,
}
}

// Handle reasoning_content from DeepSeek's interleaved thinking
// This is the proper way DeepSeek sends thinking content in streaming
if ("reasoning_content" in delta && delta.reasoning_content) {
yield {
type: "reasoning",
text: (delta.reasoning_content as string) || "",
}
}

// Handle tool calls
if (delta.tool_calls) {
for (const toolCall of delta.tool_calls) {
yield {
type: "tool_call_partial",
index: toolCall.index,
id: toolCall.id,
name: toolCall.function?.name,
arguments: toolCall.function?.arguments,
}
}
}

if (chunk.usage) {
lastUsage = chunk.usage
}
}

if (lastUsage) {
yield this.processUsageMetrics(lastUsage, modelInfo)
}
}

// Override to handle DeepSeek's usage metrics, including caching.
protected override processUsageMetrics(usage: any): ApiStreamUsageChunk {
protected override processUsageMetrics(usage: any, _modelInfo?: any): ApiStreamUsageChunk {
return {
type: "usage",
inputTokens: usage?.prompt_tokens || 0,
Expand Down
Loading
Loading