Skip to content

Commit b929e13

Browse files
authored
chore: added utility to detect possible tool call start for a chunk (#2923)
Signed-off-by: ayushag <[email protected]>
1 parent 5ea6b8d commit b929e13

File tree

8 files changed

+370
-18
lines changed

8 files changed

+370
-18
lines changed

lib/parsers/src/tool_calling/harmony/harmony_parser.rs

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,7 @@ pub fn parse_tool_calls_harmony(
2929
// Check if tool call start tokens are present, if not return everything as normal text
3030
// Start Token: "<|start|>assistant<|channel|>commentary" should be present in the text if tool calls are present
3131
// End Token: "<|call|>"
32-
if !config
33-
.tool_call_start_tokens
34-
.iter()
35-
.any(|token| trimmed.contains(token))
36-
{
32+
if !detect_tool_call_start_harmony(text, config) {
3733
return Ok((vec![], Some(trimmed)));
3834
}
3935

@@ -158,6 +154,17 @@ pub fn parse_tool_calls_harmony(
158154
Ok((res, Some(normal_text.to_string())))
159155
}
160156

157+
pub fn detect_tool_call_start_harmony(chunk: &str, config: &JsonParserConfig) -> bool {
158+
let trimmed = chunk.trim();
159+
if trimmed.is_empty() {
160+
return false;
161+
}
162+
config
163+
.tool_call_start_tokens
164+
.iter()
165+
.any(|token| trimmed.contains(token))
166+
}
167+
161168
#[cfg(test)]
162169
mod tests {
163170
use super::*;
@@ -270,3 +277,32 @@ mod tests {
270277
assert_eq!(args["unit"], "celsius");
271278
}
272279
}
280+
281+
#[cfg(test)]
282+
mod detect_parser_tests {
283+
use super::*;
284+
285+
#[test]
286+
fn test_detect_tool_call_start_harmony_chunk_with_tool_call_start_token() {
287+
let text = r#"<|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json"#;
288+
let config = JsonParserConfig {
289+
tool_call_start_tokens: vec!["<|start|>assistant<|channel|>commentary".to_string()],
290+
tool_call_end_tokens: vec!["<|call|>".to_string()],
291+
..Default::default()
292+
};
293+
let result = detect_tool_call_start_harmony(text, &config);
294+
assert!(result);
295+
}
296+
297+
#[test]
298+
fn test_detect_tool_call_start_harmony_chunk_without_tool_call_start_token() {
299+
let text = r#"<|channel|>commentary to=functions.get_current_weather"#;
300+
let config = JsonParserConfig {
301+
tool_call_start_tokens: vec!["<|start|>assistant<|channel|>commentary".to_string()],
302+
tool_call_end_tokens: vec!["<|call|>".to_string()],
303+
..Default::default()
304+
};
305+
let result = detect_tool_call_start_harmony(text, &config);
306+
assert!(!result);
307+
}
308+
}

lib/parsers/src/tool_calling/harmony/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44
pub mod harmony_parser;
55

66
pub use super::{config, response};
7-
pub use harmony_parser::parse_tool_calls_harmony;
7+
pub use harmony_parser::{detect_tool_call_start_harmony, parse_tool_calls_harmony};

lib/parsers/src/tool_calling/json/base_json_parser.rs

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,3 +306,133 @@ pub fn try_tool_call_parse_basic_json(
306306

307307
Ok((vec![], Some(trimmed.to_string())))
308308
}
309+
310+
pub fn detect_tool_call_start_basic_json(chunk: &str, config: &JsonParserConfig) -> bool {
311+
let trimmed = chunk.trim();
312+
if trimmed.is_empty() {
313+
return false;
314+
}
315+
config
316+
.tool_call_start_tokens
317+
.iter()
318+
.any(|token| trimmed.contains(token))
319+
|| trimmed.contains('{')
320+
|| trimmed.contains('[')
321+
}
322+
323+
#[cfg(test)]
324+
mod detect_parser_tests {
325+
use super::*;
326+
327+
#[test]
328+
fn detect_tool_call_start_basic_json_chunk_with_tool_call_start_token_hermes() {
329+
let text =
330+
r#"<tool_call>{"name": "search", "parameters": { "query": "rust" } }</tool_call>"#;
331+
let config = JsonParserConfig {
332+
tool_call_start_tokens: vec!["<tool_call>".to_string()],
333+
tool_call_end_tokens: vec!["</tool_call>".to_string()],
334+
..Default::default()
335+
};
336+
let result = detect_tool_call_start_basic_json(text, &config);
337+
assert!(result);
338+
}
339+
340+
#[test]
341+
fn detect_tool_call_start_basic_json_chunk_without_tool_call_start_token() {
342+
let text = r#"{"name": "search", "parameters": { "query": "rust" } }"#;
343+
let config = JsonParserConfig {
344+
tool_call_start_tokens: vec!["<tool_call>".to_string()],
345+
tool_call_end_tokens: vec!["</tool_call>".to_string()],
346+
..Default::default()
347+
};
348+
let result = detect_tool_call_start_basic_json(text, &config);
349+
assert!(result);
350+
}
351+
352+
#[test]
353+
fn detect_tool_call_start_basic_json_chunk_without_tool_call_start_token_with_normal_text() {
354+
let text = r#"Here it is {"name": "#;
355+
let config = JsonParserConfig {
356+
tool_call_start_tokens: vec!["<tool_call>".to_string()],
357+
tool_call_end_tokens: vec!["</tool_call>".to_string()],
358+
..Default::default()
359+
};
360+
let result = detect_tool_call_start_basic_json(text, &config);
361+
assert!(result);
362+
}
363+
364+
#[test]
365+
fn detect_tool_call_start_basic_json_chunk_with_square_brackets() {
366+
// These kind of false positives are expected when calling this function for stream=True
367+
let text = r#"Here it is [{"name": "search","#;
368+
let config = JsonParserConfig {
369+
tool_call_start_tokens: vec!["<tool_call>".to_string()],
370+
tool_call_end_tokens: vec!["</tool_call>".to_string()],
371+
..Default::default()
372+
};
373+
let result = detect_tool_call_start_basic_json(text, &config);
374+
assert!(result);
375+
}
376+
377+
#[test]
378+
fn detect_tool_call_start_basic_json_chunk_false_positive() {
379+
// These kind of false positives are expected when calling this function for stream=True
380+
let text = r#"Here it is { Whats up"#;
381+
let config = JsonParserConfig {
382+
tool_call_start_tokens: vec!["<tool_call>".to_string()],
383+
tool_call_end_tokens: vec!["</tool_call>".to_string()],
384+
..Default::default()
385+
};
386+
let result = detect_tool_call_start_basic_json(text, &config);
387+
assert!(result);
388+
}
389+
390+
#[test]
391+
fn detect_tool_call_start_basic_json_chunk_with_tool_call_start_token_nemotron_deci() {
392+
let text =
393+
r#"<TOOLCALL>[{"name": "search", "parameters": { "query": "rust" } }]</TOOLCALL>"#;
394+
let config = JsonParserConfig {
395+
tool_call_start_tokens: vec!["<TOOLCALL>".to_string()],
396+
tool_call_end_tokens: vec!["</TOOLCALL>".to_string()],
397+
..Default::default()
398+
};
399+
let result = detect_tool_call_start_basic_json(text, &config);
400+
assert!(result);
401+
}
402+
403+
#[test]
404+
fn detect_tool_call_start_basic_json_chunk_with_lllama3_json_token() {
405+
let text = r#"<|python_tag|>{ "name": }"#;
406+
let config = JsonParserConfig {
407+
tool_call_start_tokens: vec!["<|python_tag|>".to_string()],
408+
tool_call_end_tokens: vec!["".to_string()],
409+
..Default::default()
410+
};
411+
let result = detect_tool_call_start_basic_json(text, &config);
412+
assert!(result);
413+
}
414+
415+
#[test]
416+
fn detect_tool_call_start_basic_json_chunk_mistral_token() {
417+
let text = r#"Hello Yo ! [TOOL_CALLS]{"name": "search", "#;
418+
let config = JsonParserConfig {
419+
tool_call_start_tokens: vec!["[TOOL_CALLS]".to_string()],
420+
tool_call_end_tokens: vec!["".to_string()],
421+
..Default::default()
422+
};
423+
let result = detect_tool_call_start_basic_json(text, &config);
424+
assert!(result);
425+
}
426+
427+
#[test]
428+
fn detect_tool_call_start_basic_json_chunk_phi4_token() {
429+
let text = r#"functools{"name": "search", "#;
430+
let config = JsonParserConfig {
431+
tool_call_start_tokens: vec!["functools".to_string()],
432+
tool_call_end_tokens: vec!["".to_string()],
433+
..Default::default()
434+
};
435+
let result = detect_tool_call_start_basic_json(text, &config);
436+
assert!(result);
437+
}
438+
}

lib/parsers/src/tool_calling/json/deepseek_parser.rs

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,7 @@ pub fn parse_tool_calls_deepseek_v3_1(
4343
}
4444

4545
// If tool call start token is not present then, no tool calls are there, return empty tool calls and the original trimmed string
46-
if let Some(start_token) = tool_call_start_tokens.first() {
47-
if !trimmed.contains(start_token) {
48-
return Ok((vec![], Some(trimmed.to_string())));
49-
}
50-
} else {
51-
// Invalid start token
46+
if !detect_tool_call_start_deepseek_v3_1(trimmed, config) {
5247
return Ok((vec![], Some(trimmed.to_string())));
5348
}
5449

@@ -106,6 +101,15 @@ pub fn parse_tool_calls_deepseek_v3_1(
106101
Ok((tool_calls, Some(normal_text)))
107102
}
108103

104+
pub fn detect_tool_call_start_deepseek_v3_1(chunk: &str, config: &JsonParserConfig) -> bool {
105+
let trimmed = chunk.trim();
106+
!trimmed.is_empty()
107+
&& config
108+
.tool_call_start_tokens
109+
.iter()
110+
.any(|token| trimmed.contains(token))
111+
}
112+
109113
#[cfg(test)]
110114
mod tests {
111115
use super::*;
@@ -220,3 +224,43 @@ mod tests {
220224
assert_eq!(result.len(), 0);
221225
}
222226
}
227+
228+
#[cfg(test)]
229+
mod detect_parser_tests {
230+
use super::*;
231+
#[test]
232+
fn test_detect_tool_call_start_deepseek_v3_1_chunk_with_tool_call_start_token() {
233+
let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather宽带}"#;
234+
let config = JsonParserConfig {
235+
tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()],
236+
tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()],
237+
..Default::default()
238+
};
239+
let result = detect_tool_call_start_deepseek_v3_1(text, &config);
240+
assert!(result);
241+
}
242+
243+
#[test]
244+
fn test_detect_tool_call_start_deepseek_v3_1_chunk_without_tool_call_start_token() {
245+
let text = r#"<|tool▁call▁begin|>get_current_weather宽带}"#;
246+
let config = JsonParserConfig {
247+
tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()],
248+
tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()],
249+
..Default::default()
250+
};
251+
let result = detect_tool_call_start_deepseek_v3_1(text, &config);
252+
assert!(!result);
253+
}
254+
255+
#[test]
256+
fn test_detect_tool_call_start_deepseek_v3_1_chunk_with_tool_call_start_token_in_middle() {
257+
let text = r#"The following tool calls retrieve weather information: <|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather宽带}"#;
258+
let config = JsonParserConfig {
259+
tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()],
260+
tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()],
261+
..Default::default()
262+
};
263+
let result = detect_tool_call_start_deepseek_v3_1(text, &config);
264+
assert!(result);
265+
}
266+
}

lib/parsers/src/tool_calling/json/mod.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ pub mod base_json_parser;
55
pub mod deepseek_parser;
66

77
pub use super::{config, response};
8-
pub use base_json_parser::try_tool_call_parse_basic_json;
9-
pub use deepseek_parser::parse_tool_calls_deepseek_v3_1;
8+
pub use base_json_parser::{detect_tool_call_start_basic_json, try_tool_call_parse_basic_json};
9+
pub use deepseek_parser::{detect_tool_call_start_deepseek_v3_1, parse_tool_calls_deepseek_v3_1};
1010

1111
pub use super::config::JsonParserConfig;
1212
pub use super::response::ToolCallResponse;
@@ -34,3 +34,10 @@ pub fn try_tool_call_parse_json(
3434
JsonParserType::DeepseekV31 => parse_tool_calls_deepseek_v3_1(message, config),
3535
}
3636
}
37+
38+
pub fn detect_tool_call_start_json(chunk: &str, config: &JsonParserConfig) -> bool {
39+
match config.parser_type {
40+
JsonParserType::Basic => detect_tool_call_start_basic_json(chunk, config),
41+
JsonParserType::DeepseekV31 => detect_tool_call_start_deepseek_v3_1(chunk, config),
42+
}
43+
}

0 commit comments

Comments
 (0)