@@ -306,3 +306,141 @@ pub fn try_tool_call_parse_basic_json(
306306
307307 Ok ( ( vec ! [ ] , Some ( trimmed. to_string ( ) ) ) )
308308}
309+
310+ pub fn detect_tool_call_start_basic_json (
311+ chunk : & str ,
312+ config : & JsonParserConfig ,
313+ ) -> anyhow:: Result < bool > {
314+ // Case 1: If there is any of the start tokens in the chunk, return true
315+ if config
316+ . tool_call_start_tokens
317+ . iter ( )
318+ . any ( |token| chunk. contains ( token) )
319+ {
320+ return Ok ( true ) ;
321+ }
322+
323+ // Case 2: If there is any "{" or "[" in the chunk, return true
324+ // This case will lead to false positives for those models which does not emit tool call start tokens
325+ if chunk. contains ( "{" ) || chunk. contains ( "[" ) {
326+ return Ok ( true ) ;
327+ }
328+ Ok ( false )
329+ }
330+
331+ #[ cfg( test) ]
332+ mod detect_parser_tests {
333+ use super :: * ;
334+
335+ #[ test]
336+ fn detect_tool_call_start_basic_json_chunk_with_tool_call_start_token_hermes ( ) {
337+ let text =
338+ r#"<tool_call>{"name": "search", "parameters": { "query": "rust" } }</tool_call>"# ;
339+ let config = JsonParserConfig {
340+ tool_call_start_tokens : vec ! [ "<tool_call>" . to_string( ) ] ,
341+ tool_call_end_tokens : vec ! [ "</tool_call>" . to_string( ) ] ,
342+ ..Default :: default ( )
343+ } ;
344+ let result = detect_tool_call_start_basic_json ( text, & config) . unwrap ( ) ;
345+ assert ! ( result) ;
346+ }
347+
348+ #[ test]
349+ fn detect_tool_call_start_basic_json_chunk_without_tool_call_start_token ( ) {
350+ let text = r#"{"name": "search", "parameters": { "query": "rust" } }"# ;
351+ let config = JsonParserConfig {
352+ tool_call_start_tokens : vec ! [ "<tool_call>" . to_string( ) ] ,
353+ tool_call_end_tokens : vec ! [ "</tool_call>" . to_string( ) ] ,
354+ ..Default :: default ( )
355+ } ;
356+ let result = detect_tool_call_start_basic_json ( text, & config) . unwrap ( ) ;
357+ assert ! ( result) ;
358+ }
359+
360+ #[ test]
361+ fn detect_tool_call_start_basic_json_chunk_without_tool_call_start_token_with_normal_text ( ) {
362+ let text = r#"Here it is {"name": "# ;
363+ let config = JsonParserConfig {
364+ tool_call_start_tokens : vec ! [ "<tool_call>" . to_string( ) ] ,
365+ tool_call_end_tokens : vec ! [ "</tool_call>" . to_string( ) ] ,
366+ ..Default :: default ( )
367+ } ;
368+ let result = detect_tool_call_start_basic_json ( text, & config) . unwrap ( ) ;
369+ assert ! ( result) ;
370+ }
371+
372+ #[ test]
373+ fn detect_tool_call_start_basic_json_chunk_with_square_brackets ( ) {
374+ // These kind of false positives are expected when calling this function for stream=True
375+ let text = r#"Here it is [{"name": "search","# ;
376+ let config = JsonParserConfig {
377+ tool_call_start_tokens : vec ! [ "<tool_call>" . to_string( ) ] ,
378+ tool_call_end_tokens : vec ! [ "</tool_call>" . to_string( ) ] ,
379+ ..Default :: default ( )
380+ } ;
381+ let result = detect_tool_call_start_basic_json ( text, & config) . unwrap ( ) ;
382+ assert ! ( result) ;
383+ }
384+
385+ #[ test]
386+ fn detect_tool_call_start_basic_json_chunk_false_positive ( ) {
387+ // These kind of false positives are expected when calling this function for stream=True
388+ let text = r#"Here it is { Whats up"# ;
389+ let config = JsonParserConfig {
390+ tool_call_start_tokens : vec ! [ "<tool_call>" . to_string( ) ] ,
391+ tool_call_end_tokens : vec ! [ "</tool_call>" . to_string( ) ] ,
392+ ..Default :: default ( )
393+ } ;
394+ let result = detect_tool_call_start_basic_json ( text, & config) . unwrap ( ) ;
395+ assert ! ( result) ;
396+ }
397+
398+ #[ test]
399+ fn detect_tool_call_start_basic_json_chunk_with_tool_call_start_token_nemotron_deci ( ) {
400+ let text =
401+ r#"<TOOLCALL>[{"name": "search", "parameters": { "query": "rust" } }]</TOOLCALL>"# ;
402+ let config = JsonParserConfig {
403+ tool_call_start_tokens : vec ! [ "<TOOLCALL>" . to_string( ) ] ,
404+ tool_call_end_tokens : vec ! [ "</TOOLCALL>" . to_string( ) ] ,
405+ ..Default :: default ( )
406+ } ;
407+ let result = detect_tool_call_start_basic_json ( text, & config) . unwrap ( ) ;
408+ assert ! ( result) ;
409+ }
410+
411+ #[ test]
412+ fn detect_tool_call_start_basic_json_chunk_with_lllama3_json_token ( ) {
413+ let text = r#"<|python_tag|>{ "name": }"# ;
414+ let config = JsonParserConfig {
415+ tool_call_start_tokens : vec ! [ "<|python_tag|>" . to_string( ) ] ,
416+ tool_call_end_tokens : vec ! [ "" . to_string( ) ] ,
417+ ..Default :: default ( )
418+ } ;
419+ let result = detect_tool_call_start_basic_json ( text, & config) . unwrap ( ) ;
420+ assert ! ( result) ;
421+ }
422+
423+ #[ test]
424+ fn detect_tool_call_start_basic_json_chunk_mistral_token ( ) {
425+ let text = r#"Hello Yo ! [TOOL_CALLS]{"name": "search", "# ;
426+ let config = JsonParserConfig {
427+ tool_call_start_tokens : vec ! [ "[TOOL_CALLS]" . to_string( ) ] ,
428+ tool_call_end_tokens : vec ! [ "" . to_string( ) ] ,
429+ ..Default :: default ( )
430+ } ;
431+ let result = detect_tool_call_start_basic_json ( text, & config) . unwrap ( ) ;
432+ assert ! ( result) ;
433+ }
434+
435+ #[ test]
436+ fn detect_tool_call_start_basic_json_chunk_phi4_token ( ) {
437+ let text = r#"functools{"name": "search", "# ;
438+ let config = JsonParserConfig {
439+ tool_call_start_tokens : vec ! [ "functools" . to_string( ) ] ,
440+ tool_call_end_tokens : vec ! [ "" . to_string( ) ] ,
441+ ..Default :: default ( )
442+ } ;
443+ let result = detect_tool_call_start_basic_json ( text, & config) . unwrap ( ) ;
444+ assert ! ( result) ;
445+ }
446+ }
0 commit comments