Skip to content

Commit e41fa1a

Browse files
fix: deepseek tool parsing fixed (#3557)
1 parent f4cd71f commit e41fa1a

File tree

3 files changed

+327
-108
lines changed

3 files changed

+327
-108
lines changed

lib/llm/tests/parallel_tool_call_integration.rs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,3 +381,99 @@ async fn test_empty_tool_calls() {
381381
);
382382
assert_eq!(remaining_content.unwrap(), content_without_tools);
383383
}
384+
385+
#[tokio::test]
386+
async fn test_deepseek_v3_1_tool_call_parsing() {
387+
let response_content = r#"I'll help you understand this codebase. Let me start by exploring the structure and key
388+
files to provide you with a comprehensive
389+
explanation.<|tool▁calls▁begin|><|tool▁call▁begin|>TodoWrite<|tool▁sep|>{"todos":
390+
[{"content": "Explore the root directory structure", "status": "in_progress", "activeForm":
391+
"Exploring the root directory structure"}, {"content": "Examine package.json and
392+
configuration files", "status": "pending", "activeForm": "Examining package.json and
393+
configuration files"}, {"content": "Analyze source code structure and key modules",
394+
"status": "pending", "activeForm": "Analyzing source code structure and key modules"},
395+
{"content": "Identify main entry points and architectural patterns", "status": "pending",
396+
"activeForm": "Identifying main entry points and architectural patterns"}, {"content":
397+
"Summarize the codebase purpose and functionality", "status": "pending", "activeForm":
398+
"Summarizing the codebase purpose and
399+
functionality"}]}<|tool▁call▁end|><|tool▁calls▁end|>"#;
400+
401+
// Debug: Print the content
402+
println!("Response content: {}", response_content);
403+
println!(
404+
"Contains tool_calls_begin: {}",
405+
response_content.contains("<|tool▁calls▁begin|>")
406+
);
407+
println!(
408+
"Contains tool_call_begin: {}",
409+
response_content.contains("<|tool▁call▁begin|>")
410+
);
411+
412+
// Parse the tool calls using the deepseek_v3_1 parser
413+
let (tool_calls, remaining_content) =
414+
detect_and_parse_tool_call(response_content, Some("deepseek_v3_1"))
415+
.await
416+
.expect("Should successfully parse deepseek_v3_1 tool calls");
417+
418+
println!("Number of tool calls parsed: {}", tool_calls.len());
419+
if let Some(ref content) = remaining_content {
420+
println!("Remaining content: {}", content);
421+
}
422+
423+
// Validate we got exactly 1 tool call
424+
assert_eq!(tool_calls.len(), 1, "Should parse exactly 1 tool call");
425+
426+
// Validate remaining content (should be the explanatory text before the tool call)
427+
assert!(remaining_content.is_some());
428+
let remaining = remaining_content.unwrap();
429+
assert!(remaining.contains("I'll help you understand this codebase"));
430+
assert!(remaining.contains("comprehensive"));
431+
432+
// Validate the tool call
433+
let tool_call = &tool_calls[0];
434+
assert_eq!(tool_call.function.name, "TodoWrite");
435+
436+
// Validate OpenAI compatibility
437+
assert!(!tool_call.id.is_empty(), "Tool call should have an ID");
438+
assert_eq!(tool_call.tp, ToolCallType::Function);
439+
440+
// Parse and validate the arguments
441+
let args: serde_json::Value = serde_json::from_str(&tool_call.function.arguments)
442+
.expect("Arguments should be valid JSON");
443+
let args_obj = args.as_object().expect("Arguments should be an object");
444+
445+
// Check that todos array exists and has 5 items
446+
assert!(args_obj.contains_key("todos"), "Should have 'todos' key");
447+
let todos = args_obj
448+
.get("todos")
449+
.unwrap()
450+
.as_array()
451+
.expect("todos should be an array");
452+
assert_eq!(todos.len(), 5, "Should have exactly 5 todo items");
453+
454+
// Validate first todo item
455+
let first_todo = &todos[0];
456+
assert_eq!(
457+
first_todo.get("content").unwrap().as_str().unwrap(),
458+
"Explore the root directory structure"
459+
);
460+
assert_eq!(
461+
first_todo.get("status").unwrap().as_str().unwrap(),
462+
"in_progress"
463+
);
464+
assert_eq!(
465+
first_todo.get("activeForm").unwrap().as_str().unwrap(),
466+
"Exploring the root directory structure"
467+
);
468+
469+
// Validate last todo item
470+
let last_todo = &todos[4];
471+
assert_eq!(
472+
last_todo.get("content").unwrap().as_str().unwrap(),
473+
"Summarize the codebase purpose and functionality"
474+
);
475+
assert_eq!(
476+
last_todo.get("status").unwrap().as_str().unwrap(),
477+
"pending"
478+
);
479+
}

lib/parsers/src/tool_calling/config.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ pub struct JsonParserConfig {
2323
pub tool_call_start_tokens: Vec<String>,
2424
/// End token for individual tool calls (e.g., "</TOOLCALL>")
2525
pub tool_call_end_tokens: Vec<String>,
26+
/// Separator tokens between function name and arguments
27+
/// (e.g., "<|tool▁sep|>" for DeepSeek v3.1)
28+
/// Used by some models to separate function name from arguments
29+
pub tool_call_separator_tokens: Vec<String>,
2630
/// The key for the function name in the tool call
2731
/// i.e. `{"name": "function", "arguments": {...}}` it would be
2832
/// "name"
@@ -42,6 +46,7 @@ impl Default for JsonParserConfig {
4246
Self {
4347
tool_call_start_tokens: vec!["<TOOLCALL>".to_string(), "<|python_tag|>".to_string()],
4448
tool_call_end_tokens: vec!["</TOOLCALL>".to_string(), "".to_string()],
49+
tool_call_separator_tokens: vec![],
4550
function_name_keys: vec!["name".to_string()],
4651
arguments_keys: vec!["arguments".to_string(), "parameters".to_string()],
4752
parser_type: JsonParserType::Basic,
@@ -155,7 +160,11 @@ impl ToolCallConfig {
155160
"<|tool▁calls▁begin|>".to_string(),
156161
"<|tool▁call▁begin|>".to_string(),
157162
],
158-
tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()],
163+
tool_call_end_tokens: vec![
164+
"<|tool▁calls▁end|>".to_string(),
165+
"<|tool▁call▁end|>".to_string(),
166+
],
167+
tool_call_separator_tokens: vec!["<|tool▁sep|>".to_string()],
159168
parser_type: JsonParserType::DeepseekV31,
160169
..Default::default()
161170
},

0 commit comments

Comments
 (0)