diff --git a/lib/llm/tests/parallel_tool_call_integration.rs b/lib/llm/tests/parallel_tool_call_integration.rs index fee6bdfa82..7cb3cb134c 100644 --- a/lib/llm/tests/parallel_tool_call_integration.rs +++ b/lib/llm/tests/parallel_tool_call_integration.rs @@ -381,3 +381,99 @@ async fn test_empty_tool_calls() { ); assert_eq!(remaining_content.unwrap(), content_without_tools); } + +#[tokio::test] +async fn test_deepseek_v3_1_tool_call_parsing() { + let response_content = r#"I'll help you understand this codebase. Let me start by exploring the structure and key + files to provide you with a comprehensive + explanation.<|tool▁calls▁begin|><|tool▁call▁begin|>TodoWrite<|tool▁sep|>{"todos": + [{"content": "Explore the root directory structure", "status": "in_progress", "activeForm": + "Exploring the root directory structure"}, {"content": "Examine package.json and + configuration files", "status": "pending", "activeForm": "Examining package.json and + configuration files"}, {"content": "Analyze source code structure and key modules", + "status": "pending", "activeForm": "Analyzing source code structure and key modules"}, + {"content": "Identify main entry points and architectural patterns", "status": "pending", + "activeForm": "Identifying main entry points and architectural patterns"}, {"content": + "Summarize the codebase purpose and functionality", "status": "pending", "activeForm": + "Summarizing the codebase purpose and + functionality"}]}<|tool▁call▁end|><|tool▁calls▁end|>"#; + + // Debug: Print the content + println!("Response content: {}", response_content); + println!( + "Contains tool_calls_begin: {}", + response_content.contains("<|tool▁calls▁begin|>") + ); + println!( + "Contains tool_call_begin: {}", + response_content.contains("<|tool▁call▁begin|>") + ); + + // Parse the tool calls using the deepseek_v3_1 parser + let (tool_calls, remaining_content) = + detect_and_parse_tool_call(response_content, Some("deepseek_v3_1")) + .await + .expect("Should successfully parse deepseek_v3_1 tool calls"); + + println!("Number of tool calls parsed: {}", tool_calls.len()); + if let Some(ref content) = remaining_content { + println!("Remaining content: {}", content); + } + + // Validate we got exactly 1 tool call + assert_eq!(tool_calls.len(), 1, "Should parse exactly 1 tool call"); + + // Validate remaining content (should be the explanatory text before the tool call) + assert!(remaining_content.is_some()); + let remaining = remaining_content.unwrap(); + assert!(remaining.contains("I'll help you understand this codebase")); + assert!(remaining.contains("comprehensive")); + + // Validate the tool call + let tool_call = &tool_calls[0]; + assert_eq!(tool_call.function.name, "TodoWrite"); + + // Validate OpenAI compatibility + assert!(!tool_call.id.is_empty(), "Tool call should have an ID"); + assert_eq!(tool_call.tp, ToolCallType::Function); + + // Parse and validate the arguments + let args: serde_json::Value = serde_json::from_str(&tool_call.function.arguments) + .expect("Arguments should be valid JSON"); + let args_obj = args.as_object().expect("Arguments should be an object"); + + // Check that todos array exists and has 5 items + assert!(args_obj.contains_key("todos"), "Should have 'todos' key"); + let todos = args_obj + .get("todos") + .unwrap() + .as_array() + .expect("todos should be an array"); + assert_eq!(todos.len(), 5, "Should have exactly 5 todo items"); + + // Validate first todo item + let first_todo = &todos[0]; + assert_eq!( + first_todo.get("content").unwrap().as_str().unwrap(), + "Explore the root directory structure" + ); + assert_eq!( + first_todo.get("status").unwrap().as_str().unwrap(), + "in_progress" + ); + assert_eq!( + first_todo.get("activeForm").unwrap().as_str().unwrap(), + "Exploring the root directory structure" + ); + + // Validate last todo item + let last_todo = &todos[4]; + assert_eq!( + last_todo.get("content").unwrap().as_str().unwrap(), + "Summarize the codebase purpose and functionality" + ); + assert_eq!( + last_todo.get("status").unwrap().as_str().unwrap(), + "pending" + ); +} diff --git a/lib/parsers/src/tool_calling/config.rs b/lib/parsers/src/tool_calling/config.rs index 4673dcf9a5..57c3acb5e5 100644 --- a/lib/parsers/src/tool_calling/config.rs +++ b/lib/parsers/src/tool_calling/config.rs @@ -23,6 +23,10 @@ pub struct JsonParserConfig { pub tool_call_start_tokens: Vec, /// End token for individual tool calls (e.g., "") pub tool_call_end_tokens: Vec, + /// Separator tokens between function name and arguments + /// (e.g., "<|tool▁sep|>" for DeepSeek v3.1) + /// Used by some models to separate function name from arguments + pub tool_call_separator_tokens: Vec, /// The key for the function name in the tool call /// i.e. `{"name": "function", "arguments": {...}}` it would be /// "name" @@ -42,6 +46,7 @@ impl Default for JsonParserConfig { Self { tool_call_start_tokens: vec!["".to_string(), "<|python_tag|>".to_string()], tool_call_end_tokens: vec!["".to_string(), "".to_string()], + tool_call_separator_tokens: vec![], function_name_keys: vec!["name".to_string()], arguments_keys: vec!["arguments".to_string(), "parameters".to_string()], parser_type: JsonParserType::Basic, @@ -155,7 +160,11 @@ impl ToolCallConfig { "<|tool▁calls▁begin|>".to_string(), "<|tool▁call▁begin|>".to_string(), ], - tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], + tool_call_end_tokens: vec![ + "<|tool▁calls▁end|>".to_string(), + "<|tool▁call▁end|>".to_string(), + ], + tool_call_separator_tokens: vec!["<|tool▁sep|>".to_string()], parser_type: JsonParserType::DeepseekV31, ..Default::default() }, diff --git a/lib/parsers/src/tool_calling/json/deepseek_parser.rs b/lib/parsers/src/tool_calling/json/deepseek_parser.rs index f469b4df2e..1d15ba86fc 100644 --- a/lib/parsers/src/tool_calling/json/deepseek_parser.rs +++ b/lib/parsers/src/tool_calling/json/deepseek_parser.rs @@ -1,30 +1,116 @@ // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -use regex::Regex; +use regex::RegexBuilder; use serde_json::Value; -use std::sync::OnceLock; +use uuid::Uuid; use super::config::JsonParserConfig; use super::response::{CalledFunction, ToolCallResponse, ToolCallType}; -static DEEPSEEK_V3_1_OUTER_REGEX: OnceLock = OnceLock::new(); -static DEEPSEEK_V3_1_INNER_REGEX: OnceLock = OnceLock::new(); +/// Extract individual tool call blocks from the input string. +/// Returns a list of strings, each representing one tool call block. +/// +/// DeepSeek format: <|tool▁call▁begin|>{name}<|tool▁sep|>{args}<|tool▁call▁end|> +/// +/// DeepSeek uses nested tokens: +/// - Wrapper tokens: <|tool▁calls▁begin|> ... <|tool▁calls▁end|> (wraps all tool calls) +/// - Individual tokens: <|tool▁call▁begin|> ... <|tool▁call▁end|> (individual call) +fn extract_tool_call_blocks( + input: &str, + start_tokens: &[String], + end_tokens: &[String], +) -> Vec { + let mut blocks = Vec::new(); + + // Filter tokens to find individual call markers (not the wrapper "calls" versions) + let individual_start_tokens: Vec<&String> = start_tokens + .iter() + .filter(|t| t.contains("tool_call_begin") || t.contains("tool▁call▁begin")) + .collect(); -pub fn get_deepseek_v3_1_outer_regex() -> &'static Regex { - DEEPSEEK_V3_1_OUTER_REGEX.get_or_init(|| { - // Outer regex: matches the entire tool call block - Regex::new(r"(?s)<|tool▁call▁begin|>.*?<|tool▁call▁end|>") - .expect("Failed to compile deepseek v3.1 outer regex pattern") - }) + let individual_end_tokens: Vec<&String> = end_tokens + .iter() + .filter(|t| t.contains("tool_call_end") || t.contains("tool▁call▁end")) + .collect(); + + // Try all combinations of individual start and end tokens + for start_token in individual_start_tokens.iter() { + for end_token in individual_end_tokens.iter() { + if start_token.is_empty() || end_token.is_empty() { + continue; + } + + // Build regex pattern with escaped tokens + let escaped_start = regex::escape(start_token); + let escaped_end = regex::escape(end_token); + let pattern = format!(r"{}(.*?){}", escaped_start, escaped_end); + + if let Ok(regex) = RegexBuilder::new(&pattern) + .dot_matches_new_line(true) + .build() + { + for capture in regex.captures_iter(input) { + if let Some(matched) = capture.get(1) { + // Don't trim the content - preserve whitespace for multiline JSON + let content = matched.as_str(); + if !content.trim().is_empty() { + blocks.push(content.to_string()); + } + } + } + + // If we found matches with this token pair, don't try other combinations + if !blocks.is_empty() { + return blocks; + } + } + } + } + + blocks } -pub fn get_deepseek_v3_1_inner_regex() -> &'static Regex { - DEEPSEEK_V3_1_INNER_REGEX.get_or_init(|| { - // Inner regex: captures function name and arguments between sep tokens - Regex::new(r"(?s)<|tool▁call▁begin|>(.*?)<|tool▁sep|>(.*?)<|tool▁call▁end|>") - .expect("Failed to compile deepseek v3.1 inner regex pattern") - }) +/// Parse a single tool call block that contains function name and arguments separated by a separator token. +/// +/// Format: {function_name}<|tool▁sep|>{json_arguments} +fn parse_single_tool_call(block: &str, separator_tokens: &[String]) -> Option<(String, Value)> { + // Try each separator token + for sep_token in separator_tokens.iter() { + if sep_token.is_empty() { + continue; + } + + if let Some((name_part, args_part)) = block.split_once(sep_token) { + let function_name = name_part.trim(); + let args_str = args_part.trim(); + + // Validate function name (should not be empty and should not contain JSON-like chars) + if function_name.is_empty() || function_name.contains(['{', '}', '[', ']']) { + continue; + } + + // Try to parse arguments as JSON + // First try parsing as-is + if let Ok(arguments) = serde_json::from_str::(args_str) { + return Some((function_name.to_string(), arguments)); + } + + // If that fails, try normalizing the JSON (handle multiline strings with unescaped newlines) + // This is a lenient approach for malformed JSON that may come from LLMs + let normalized = args_str + .lines() + .map(|line| line.trim_start()) + .collect::>() + .join(" "); + + if let Ok(arguments) = serde_json::from_str::(&normalized) { + return Some((function_name.to_string(), arguments)); + } + } + } + + None } pub fn parse_tool_calls_deepseek_v3_1( @@ -32,72 +118,81 @@ pub fn parse_tool_calls_deepseek_v3_1( config: &JsonParserConfig, ) -> anyhow::Result<(Vec, Option)> { // Format Structure: - // <|tool▁calls▁begin|><|tool▁call▁begin|>{function_name}<|tool▁sep|>{json_arguments}<|tool▁calls▁end|><|end▁of▁sentence|> + // <|tool▁calls▁begin|><|tool▁call▁begin|>{function_name}<|tool▁sep|>{json_arguments}<|tool▁call▁end|><|tool▁calls▁end|> let trimmed = message.trim(); + // Early exit if no content + if trimmed.is_empty() { + return Ok((vec![], Some(String::new()))); + } + let tool_call_start_tokens = &config.tool_call_start_tokens; + let tool_call_end_tokens = &config.tool_call_end_tokens; + let separator_tokens = &config.tool_call_separator_tokens; - // Early exit if no content or tool_call_start_tokens is empty - if trimmed.is_empty() || tool_call_start_tokens.is_empty() { + // Early exit if no tokens configured + if tool_call_start_tokens.is_empty() || separator_tokens.is_empty() { return Ok((vec![], Some(trimmed.to_string()))); } - // If tool call start token is not present then, no tool calls are there, return empty tool calls and the original trimmed string + // Check if tool call start token is present if !detect_tool_call_start_deepseek_v3_1(trimmed, config) { return Ok((vec![], Some(trimmed.to_string()))); } - let outer_re = get_deepseek_v3_1_outer_regex(); - let inner_re = get_deepseek_v3_1_inner_regex(); - - let outer_matches = outer_re.find_iter(trimmed); + // Extract normal text (content before the first wrapper start token) + // Look for wrapper tokens like <|tool▁calls▁begin|> (note: "calls" not "call") + let wrapper_tokens: Vec<&String> = tool_call_start_tokens + .iter() + .filter(|t| t.contains("tool_calls_begin") || t.contains("tool▁calls▁begin")) + .collect(); + + let normal_text = if !wrapper_tokens.is_empty() { + wrapper_tokens + .iter() + .find_map(|token| { + trimmed + .find(token.as_str()) + .map(|idx| trimmed[..idx].to_string()) + }) + .unwrap_or_else(String::new) + } else { + // Fallback to first individual call token if no wrapper found + tool_call_start_tokens + .iter() + .filter(|token| !token.is_empty()) + .find_map(|token| trimmed.find(token).map(|idx| trimmed[..idx].to_string())) + .unwrap_or_else(String::new) + }; + + // Extract individual tool call blocks + let blocks = extract_tool_call_blocks(trimmed, tool_call_start_tokens, tool_call_end_tokens); + + if blocks.is_empty() { + // Found start token but no valid blocks + return Ok((vec![], Some(trimmed.to_string()))); + } + // Parse each block to extract function name and arguments let mut tool_calls: Vec = Vec::new(); - let mut call_idx = 0usize; - // Two matches are there, first one using outer regex to extract multiple tool calls - // Second one using inner regex to extract the structure of the tool call - for outer_match in outer_matches { - for grp in inner_re.captures_iter(outer_match.as_str()) { - let Some(function_name) = grp.get(1).map(|x| x.as_str()) else { - continue; // Skip if function name is not found - }; - - let Some(arg_match) = grp.get(2) else { - continue; // Skip if arguments Match is not found. - }; - - let arguments = match serde_json::from_str::(arg_match.as_str()) { - Ok(args) => args, - Err(_) => { - continue; // Skip if arguments are not valid JSON - } - }; - - call_idx += 1; + for block in blocks { + if let Some((function_name, arguments)) = parse_single_tool_call(&block, separator_tokens) { tool_calls.push(ToolCallResponse { - id: format!("call-{}", call_idx), + id: format!("call-{}", Uuid::new_v4()), tp: ToolCallType::Function, function: CalledFunction { - name: function_name.to_string(), + name: function_name, arguments: serde_json::to_string(&arguments)?, }, }); } } - // Fast path: if no tool calls, just return early - // This may happen due to invalid json or any other parsing error reasons + // If no valid tool calls were parsed, return everything as normal text if tool_calls.is_empty() { return Ok((vec![], Some(trimmed.to_string()))); } - // Safety: We already checked above that tool_call_start_tokens.first() is Some - let start_token = tool_call_start_tokens.first().unwrap(); - let normal_text = trimmed - .split_once(start_token) - .map(|(before, _)| before.to_string()) - .unwrap_or_else(|| trimmed.to_string()); - Ok((tool_calls, Some(normal_text))) } @@ -139,6 +234,7 @@ pub fn detect_tool_call_start_deepseek_v3_1(chunk: &str, config: &JsonParserConf #[cfg(test)] mod tests { + use super::super::config::ToolCallConfig; use super::*; fn extract_name_and_args(call: ToolCallResponse) -> (String, serde_json::Value) { @@ -149,11 +245,7 @@ mod tests { #[test] fn test_parse_tool_calls_deepseek_v3_1_basic() { let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather<|tool▁sep|>{"location": "Tokyo"}<|tool▁call▁end|><|tool▁call▁begin|>get_current_weather<|tool▁sep|>{"location": "Paris"}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>"#; - let config = JsonParserConfig { - tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()], - tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], - ..Default::default() - }; + let config = ToolCallConfig::deepseek_v3_1().json; let (result, content) = parse_tool_calls_deepseek_v3_1(text, &config).unwrap(); assert_eq!(content, Some("".to_string())); assert_eq!(result.len(), 2); @@ -168,11 +260,7 @@ mod tests { #[test] fn test_parse_tool_calls_deepseek_v3_1_with_normal_text() { let text = r#"The following tool call retrieves weather information: <|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather<|tool▁sep|>{"location": "New York"}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>"#; - let config = JsonParserConfig { - tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()], - tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], - ..Default::default() - }; + let config = ToolCallConfig::deepseek_v3_1().json; let (result, content) = parse_tool_calls_deepseek_v3_1(text, &config).unwrap(); assert_eq!( content, @@ -187,11 +275,7 @@ mod tests { #[test] fn test_parse_tool_calls_deepseek_v3_1_without_tool_call_start_token() { let text = r#"<|tool▁call▁begin|>get_current_weather宽带}{location": "Tokyo"}<|tool▁call▁end|><|tool▁calls▁end|>"#; - let config = JsonParserConfig { - tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()], - tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], - ..Default::default() - }; + let config = ToolCallConfig::deepseek_v3_1().json; let (result, content) = parse_tool_calls_deepseek_v3_1(text, &config).unwrap(); assert_eq!(content, Some(text.to_string())); assert_eq!(result.len(), 0); @@ -200,11 +284,7 @@ mod tests { #[test] fn test_parse_tool_calls_deepseek_v3_1_with_multi_tool_calls_with_multiple_args() { let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather<|tool▁sep|>{"location": "Berlin", "units": "metric"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather_forecast<|tool▁sep|>{"location": "Berlin", "days": 7, "units": "imperial"}<|tool▁call▁end|><|tool▁call▁begin|>get_air_quality<|tool▁sep|>{"location": "Berlin", "radius": 50}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>"#; - let config = JsonParserConfig { - tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()], - tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], - ..Default::default() - }; + let config = ToolCallConfig::deepseek_v3_1().json; let (result, content) = parse_tool_calls_deepseek_v3_1(text, &config).unwrap(); assert_eq!(content, Some("".to_string())); assert_eq!(result.len(), 3); @@ -227,11 +307,7 @@ mod tests { fn test_parse_tool_calls_deepseek_v3_1_with_invalid_json() { // Everything is normal text in case of invalid json let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather}{location": "Tokyo"}<|tool▁call▁end|><|tool▁calls▁end|>"#; - let config = JsonParserConfig { - tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()], - tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], - ..Default::default() - }; + let config = ToolCallConfig::deepseek_v3_1().json; let (result, content) = parse_tool_calls_deepseek_v3_1(text, &config).unwrap(); assert_eq!(content, Some(text.trim().to_string())); assert_eq!(result.len(), 0); @@ -241,28 +317,78 @@ mod tests { fn test_parse_tool_calls_deepseek_v3_1_with_multi_tool_calls_with_normal_text() { // Everything is normal text in case of invalid json let text = r#"The following tool calls retrieve weather information: <|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather宽带}{location": "Tokyo"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather_forecast宽带}{location": "Berlin", "days": 7, "units": "imperial"}<|tool▁call▁end|><|tool▁call▁begin|>get_air_quality宽带}{location": "Berlin", "radius": 50}<|tool▁call▁end|><|tool▁calls▁end|>"#; - let config = JsonParserConfig { - tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()], - tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], - ..Default::default() - }; + let config = ToolCallConfig::deepseek_v3_1().json; let (result, content) = parse_tool_calls_deepseek_v3_1(text, &config).unwrap(); assert_eq!(content, Some(text.trim().to_string())); assert_eq!(result.len(), 0); } + + #[test] + fn test_parse_tool_calls_deepseek_v3_1_with_multiline_json() { + let text = r#"I'll help you understand this codebase. Let me start by exploring the structure and key + files to provide you with a comprehensive + explanation.<|tool▁calls▁begin|><|tool▁call▁begin|>TodoWrite<|tool▁sep|>{"todos": + [{"content": "Explore the root directory structure", "status": "in_progress", "activeForm": + "Exploring the root directory structure"}, {"content": "Examine package.json and + configuration files", "status": "pending", "activeForm": "Examining package.json and + configuration files"}, {"content": "Analyze source code structure and key modules", + "status": "pending", "activeForm": "Analyzing source code structure and key modules"}, + {"content": "Identify main entry points and architectural patterns", "status": "pending", + "activeForm": "Identifying main entry points and architectural patterns"}, {"content": + "Summarize the codebase purpose and functionality", "status": "pending", "activeForm": + "Summarizing the codebase purpose and + functionality"}]}<|tool▁call▁end|><|tool▁calls▁end|>"#; + let config = ToolCallConfig::deepseek_v3_1().json; + + let (tool_call_results, normal_content) = + parse_tool_calls_deepseek_v3_1(text, &config).unwrap(); + + assert_eq!(tool_call_results.len(), 1); + + let (name, args) = extract_name_and_args(tool_call_results[0].clone()); + assert_eq!(name, "TodoWrite"); + assert_eq!(tool_call_results[0].tp, ToolCallType::Function); + + let todos_array = args["todos"].as_array().unwrap(); + assert_eq!(todos_array.len(), 5); + + assert_eq!( + todos_array[0]["content"], + "Explore the root directory structure" + ); + assert_eq!(todos_array[0]["status"], "in_progress"); + assert_eq!( + todos_array[0]["activeForm"], + "Exploring the root directory structure" + ); + + assert_eq!( + todos_array[1]["content"], + "Examine package.json and configuration files" + ); + assert_eq!(todos_array[1]["status"], "pending"); + + assert_eq!( + todos_array[4]["content"], + "Summarize the codebase purpose and functionality" + ); + assert_eq!(todos_array[4]["status"], "pending"); + + assert_eq!( + normal_content, + Some("I'll help you understand this codebase. Let me start by exploring the structure and key\n files to provide you with a comprehensive\n explanation.".to_string()) + ); + } } #[cfg(test)] mod detect_parser_tests { + use super::super::config::ToolCallConfig; use super::*; #[test] fn test_detect_tool_call_start_deepseek_v3_1_chunk_with_tool_call_start_token() { let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather宽带}"#; - let config = JsonParserConfig { - tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()], - tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], - ..Default::default() - }; + let config = ToolCallConfig::deepseek_v3_1().json; let result = detect_tool_call_start_deepseek_v3_1(text, &config); assert!(result); } @@ -270,23 +396,15 @@ mod detect_parser_tests { #[test] fn test_detect_tool_call_start_deepseek_v3_1_chunk_without_tool_call_start_token() { let text = r#"<|tool▁call▁begin|>get_current_weather宽带}"#; - let config = JsonParserConfig { - tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()], - tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], - ..Default::default() - }; + let config = ToolCallConfig::deepseek_v3_1().json; let result = detect_tool_call_start_deepseek_v3_1(text, &config); - assert!(!result); + assert!(result); } #[test] fn test_detect_tool_call_start_deepseek_v3_1_chunk_with_tool_call_start_token_in_middle() { let text = r#"The following tool calls retrieve weather information: <|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather宽带}"#; - let config = JsonParserConfig { - tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()], - tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], - ..Default::default() - }; + let config = ToolCallConfig::deepseek_v3_1().json; let result = detect_tool_call_start_deepseek_v3_1(text, &config); assert!(result); } @@ -294,11 +412,7 @@ mod detect_parser_tests { #[test] fn test_detect_tool_call_start_deepseek_v3_1_partial_tokens() { // Test partial token detection for streaming scenarios with unicode characters - let config = JsonParserConfig { - tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()], - tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], - ..Default::default() - }; + let config = ToolCallConfig::deepseek_v3_1().json; // Test various partial prefixes assert!(