From 1b2b8912c5f69db85c416d2ce8ccd5839b56e3c8 Mon Sep 17 00:00:00 2001 From: ayushag Date: Sun, 7 Sep 2025 21:41:01 +0000 Subject: [PATCH 1/3] chore: added utility to detect possible tool call start for a chunk Signed-off-by: ayushag --- .../tool_calling/harmony/harmony_parser.rs | 53 ++++++- lib/parsers/src/tool_calling/harmony/mod.rs | 2 +- .../src/tool_calling/json/base_json_parser.rs | 138 ++++++++++++++++++ .../src/tool_calling/json/deepseek_parser.rs | 62 +++++++- lib/parsers/src/tool_calling/json/mod.rs | 11 +- lib/parsers/src/tool_calling/parsers.rs | 97 +++++++++++- lib/parsers/src/tool_calling/pythonic/mod.rs | 2 +- .../tool_calling/pythonic/pythonic_parser.rs | 44 ++++++ 8 files changed, 391 insertions(+), 18 deletions(-) diff --git a/lib/parsers/src/tool_calling/harmony/harmony_parser.rs b/lib/parsers/src/tool_calling/harmony/harmony_parser.rs index 90d4527981..7e5a5e58ee 100644 --- a/lib/parsers/src/tool_calling/harmony/harmony_parser.rs +++ b/lib/parsers/src/tool_calling/harmony/harmony_parser.rs @@ -29,11 +29,7 @@ pub fn parse_tool_calls_harmony( // Check if tool call start tokens are present, if not return everything as normal text // Start Token: "<|start|>assistant<|channel|>commentary" should be present in the text if tool calls are present // End Token: "<|call|>" - if !config - .tool_call_start_tokens - .iter() - .any(|token| trimmed.contains(token)) - { + if !detect_tool_call_start_harmony(text, config).unwrap_or(false) { return Ok((vec![], Some(trimmed))); } @@ -158,6 +154,24 @@ pub fn parse_tool_calls_harmony( Ok((res, Some(normal_text.to_string()))) } +pub fn detect_tool_call_start_harmony( + chunk: &str, + config: &JsonParserConfig, +) -> anyhow::Result { + let trimmed = chunk.trim(); + if trimmed.is_empty() { + return Ok(false); + } + if config + .tool_call_start_tokens + .iter() + .any(|token| trimmed.contains(token)) + { + return Ok(true); + } + Ok(false) +} + #[cfg(test)] mod tests { use super::*; @@ -270,3 +284,32 @@ mod tests { assert_eq!(args["unit"], "celsius"); } } + +#[cfg(test)] +mod detect_parser_tests { + use super::*; + + #[test] + fn test_detect_tool_call_start_harmony_chunk_with_tool_call_start_token() { + let text = r#"<|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json"#; + let config = JsonParserConfig { + tool_call_start_tokens: vec!["<|start|>assistant<|channel|>commentary".to_string()], + tool_call_end_tokens: vec!["<|call|>".to_string()], + ..Default::default() + }; + let result = detect_tool_call_start_harmony(text, &config).unwrap(); + assert!(result); + } + + #[test] + fn test_detect_tool_call_start_harmony_chunk_without_tool_call_start_token() { + let text = r#"<|channel|>commentary to=functions.get_current_weather"#; + let config = JsonParserConfig { + tool_call_start_tokens: vec!["<|start|>assistant<|channel|>commentary".to_string()], + tool_call_end_tokens: vec!["<|call|>".to_string()], + ..Default::default() + }; + let result = detect_tool_call_start_harmony(text, &config).unwrap(); + assert!(!result); + } +} diff --git a/lib/parsers/src/tool_calling/harmony/mod.rs b/lib/parsers/src/tool_calling/harmony/mod.rs index ffc9bb6aa6..5c3d6bbc45 100644 --- a/lib/parsers/src/tool_calling/harmony/mod.rs +++ b/lib/parsers/src/tool_calling/harmony/mod.rs @@ -4,4 +4,4 @@ pub mod harmony_parser; pub use super::{config, response}; -pub use harmony_parser::parse_tool_calls_harmony; +pub use harmony_parser::{detect_tool_call_start_harmony, parse_tool_calls_harmony}; diff --git a/lib/parsers/src/tool_calling/json/base_json_parser.rs b/lib/parsers/src/tool_calling/json/base_json_parser.rs index fcce3786f0..b6d7a3fb2c 100644 --- a/lib/parsers/src/tool_calling/json/base_json_parser.rs +++ b/lib/parsers/src/tool_calling/json/base_json_parser.rs @@ -306,3 +306,141 @@ pub fn try_tool_call_parse_basic_json( Ok((vec![], Some(trimmed.to_string()))) } + +pub fn detect_tool_call_start_basic_json( + chunk: &str, + config: &JsonParserConfig, +) -> anyhow::Result { + // Case 1: If there is any of the start tokens in the chunk, return true + if config + .tool_call_start_tokens + .iter() + .any(|token| chunk.contains(token)) + { + return Ok(true); + } + + // Case 2: If there is any "{" or "[" in the chunk, return true + // This case will lead to false positives for those models which does not emit tool call start tokens + if chunk.contains("{") || chunk.contains("[") { + return Ok(true); + } + Ok(false) +} + +#[cfg(test)] +mod detect_parser_tests { + use super::*; + + #[test] + fn detect_tool_call_start_basic_json_chunk_with_tool_call_start_token_hermes() { + let text = + r#"{"name": "search", "parameters": { "query": "rust" } }"#; + let config = JsonParserConfig { + tool_call_start_tokens: vec!["".to_string()], + tool_call_end_tokens: vec!["".to_string()], + ..Default::default() + }; + let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + assert!(result); + } + + #[test] + fn detect_tool_call_start_basic_json_chunk_without_tool_call_start_token() { + let text = r#"{"name": "search", "parameters": { "query": "rust" } }"#; + let config = JsonParserConfig { + tool_call_start_tokens: vec!["".to_string()], + tool_call_end_tokens: vec!["".to_string()], + ..Default::default() + }; + let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + assert!(result); + } + + #[test] + fn detect_tool_call_start_basic_json_chunk_without_tool_call_start_token_with_normal_text() { + let text = r#"Here it is {"name": "#; + let config = JsonParserConfig { + tool_call_start_tokens: vec!["".to_string()], + tool_call_end_tokens: vec!["".to_string()], + ..Default::default() + }; + let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + assert!(result); + } + + #[test] + fn detect_tool_call_start_basic_json_chunk_with_square_brackets() { + // These kind of false positives are expected when calling this function for stream=True + let text = r#"Here it is [{"name": "search","#; + let config = JsonParserConfig { + tool_call_start_tokens: vec!["".to_string()], + tool_call_end_tokens: vec!["".to_string()], + ..Default::default() + }; + let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + assert!(result); + } + + #[test] + fn detect_tool_call_start_basic_json_chunk_false_positive() { + // These kind of false positives are expected when calling this function for stream=True + let text = r#"Here it is { Whats up"#; + let config = JsonParserConfig { + tool_call_start_tokens: vec!["".to_string()], + tool_call_end_tokens: vec!["".to_string()], + ..Default::default() + }; + let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + assert!(result); + } + + #[test] + fn detect_tool_call_start_basic_json_chunk_with_tool_call_start_token_nemotron_deci() { + let text = + r#"[{"name": "search", "parameters": { "query": "rust" } }]"#; + let config = JsonParserConfig { + tool_call_start_tokens: vec!["".to_string()], + tool_call_end_tokens: vec!["".to_string()], + ..Default::default() + }; + let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + assert!(result); + } + + #[test] + fn detect_tool_call_start_basic_json_chunk_with_lllama3_json_token() { + let text = r#"<|python_tag|>{ "name": }"#; + let config = JsonParserConfig { + tool_call_start_tokens: vec!["<|python_tag|>".to_string()], + tool_call_end_tokens: vec!["".to_string()], + ..Default::default() + }; + let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + assert!(result); + } + + #[test] + fn detect_tool_call_start_basic_json_chunk_mistral_token() { + let text = r#"Hello Yo ! [TOOL_CALLS]{"name": "search", "#; + let config = JsonParserConfig { + tool_call_start_tokens: vec!["[TOOL_CALLS]".to_string()], + tool_call_end_tokens: vec!["".to_string()], + ..Default::default() + }; + let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + assert!(result); + } + + #[test] + fn detect_tool_call_start_basic_json_chunk_phi4_token() { + let text = r#"functools{"name": "search", "#; + let config = JsonParserConfig { + tool_call_start_tokens: vec!["functools".to_string()], + tool_call_end_tokens: vec!["".to_string()], + ..Default::default() + }; + let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + assert!(result); + } +} diff --git a/lib/parsers/src/tool_calling/json/deepseek_parser.rs b/lib/parsers/src/tool_calling/json/deepseek_parser.rs index 87e6db7fca..a720fb26fd 100644 --- a/lib/parsers/src/tool_calling/json/deepseek_parser.rs +++ b/lib/parsers/src/tool_calling/json/deepseek_parser.rs @@ -43,12 +43,7 @@ pub fn parse_tool_calls_deepseek_v3_1( } // If tool call start token is not present then, no tool calls are there, return empty tool calls and the original trimmed string - if let Some(start_token) = tool_call_start_tokens.first() { - if !trimmed.contains(start_token) { - return Ok((vec![], Some(trimmed.to_string()))); - } - } else { - // Invalid start token + if !detect_tool_call_start_deepseek_v3_1(trimmed, config).unwrap_or(false) { return Ok((vec![], Some(trimmed.to_string()))); } @@ -106,6 +101,21 @@ pub fn parse_tool_calls_deepseek_v3_1( Ok((tool_calls, Some(normal_text))) } +pub fn detect_tool_call_start_deepseek_v3_1( + chunk: &str, + config: &JsonParserConfig, +) -> anyhow::Result { + // if chunk contains tool_call_start_tokens then return true + if config + .tool_call_start_tokens + .iter() + .any(|token| chunk.contains(token)) + { + return Ok(true); + } + Ok(false) +} + #[cfg(test)] mod tests { use super::*; @@ -220,3 +230,43 @@ mod tests { assert_eq!(result.len(), 0); } } + +#[cfg(test)] +mod detect_parser_tests { + use super::*; + #[test] + fn test_detect_tool_call_start_deepseek_v3_1_chunk_with_tool_call_start_token() { + let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather宽带}"#; + let config = JsonParserConfig { + tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()], + tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], + ..Default::default() + }; + let result = detect_tool_call_start_deepseek_v3_1(text, &config).unwrap(); + assert!(result); + } + + #[test] + fn test_detect_tool_call_start_deepseek_v3_1_chunk_without_tool_call_start_token() { + let text = r#"<|tool▁call▁begin|>get_current_weather宽带}"#; + let config = JsonParserConfig { + tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()], + tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], + ..Default::default() + }; + let result = detect_tool_call_start_deepseek_v3_1(text, &config).unwrap(); + assert!(!result); + } + + #[test] + fn test_detect_tool_call_start_deepseek_v3_1_chunk_with_tool_call_start_token_in_middle() { + let text = r#"The following tool calls retrieve weather information: <|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather宽带}"#; + let config = JsonParserConfig { + tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()], + tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], + ..Default::default() + }; + let result = detect_tool_call_start_deepseek_v3_1(text, &config).unwrap(); + assert!(result); + } +} diff --git a/lib/parsers/src/tool_calling/json/mod.rs b/lib/parsers/src/tool_calling/json/mod.rs index f35c3600b9..21475ce36a 100644 --- a/lib/parsers/src/tool_calling/json/mod.rs +++ b/lib/parsers/src/tool_calling/json/mod.rs @@ -5,8 +5,8 @@ pub mod base_json_parser; pub mod deepseek_parser; pub use super::{config, response}; -pub use base_json_parser::try_tool_call_parse_basic_json; -pub use deepseek_parser::parse_tool_calls_deepseek_v3_1; +pub use base_json_parser::{detect_tool_call_start_basic_json, try_tool_call_parse_basic_json}; +pub use deepseek_parser::{detect_tool_call_start_deepseek_v3_1, parse_tool_calls_deepseek_v3_1}; pub use super::config::JsonParserConfig; pub use super::response::ToolCallResponse; @@ -34,3 +34,10 @@ pub fn try_tool_call_parse_json( JsonParserType::DeepseekV31 => parse_tool_calls_deepseek_v3_1(message, config), } } + +pub fn detect_tool_call_start_json(chunk: &str, config: &JsonParserConfig) -> anyhow::Result { + match config.parser_type { + JsonParserType::Basic => detect_tool_call_start_basic_json(chunk, config), + JsonParserType::DeepseekV31 => detect_tool_call_start_deepseek_v3_1(chunk, config), + } +} diff --git a/lib/parsers/src/tool_calling/parsers.rs b/lib/parsers/src/tool_calling/parsers.rs index ab3654cb54..fef379dd51 100644 --- a/lib/parsers/src/tool_calling/parsers.rs +++ b/lib/parsers/src/tool_calling/parsers.rs @@ -2,9 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 use super::config::{ToolCallConfig, ToolCallParserType}; -use super::harmony::parse_tool_calls_harmony; -use super::json::try_tool_call_parse_json; -use super::pythonic::try_tool_call_parse_pythonic; +use super::harmony::{detect_tool_call_start_harmony, parse_tool_calls_harmony}; +use super::json::{detect_tool_call_start_json, try_tool_call_parse_json}; +use super::pythonic::{detect_tool_call_start_pythonic, try_tool_call_parse_pythonic}; use super::response::ToolCallResponse; use std::collections::HashMap; use std::sync::OnceLock; @@ -86,6 +86,33 @@ pub fn detect_and_parse_tool_call( } } +pub fn detect_tool_call_start(chunk: &str, parser_str: Option<&str>) -> anyhow::Result { + let parser_map = get_tool_parser_map(); + let parser_key = match parser_str { + Some(s) if !s.is_empty() => s, + _ => "default", // None or empty string + }; + + match parser_map.get(parser_key) { + Some(config) => match config.format { + ToolCallParserType::Json => detect_tool_call_start_json(chunk, &config.json), + ToolCallParserType::Harmony => detect_tool_call_start_harmony(chunk, &config.json), + ToolCallParserType::Pythonic => detect_tool_call_start_pythonic(chunk), + ToolCallParserType::Typescript => { + anyhow::bail!("Typescript parser not implemented"); + } + ToolCallParserType::Xml => { + anyhow::bail!("Xml parser not implemented"); + } + }, + None => anyhow::bail!( + "Parser '{}' is not implemented. Available parsers: {:?}", + parser_key, + get_available_tool_parsers() + ), + } +} + // Tests // cargo test postprocessor::tool_calling::parsers #[cfg(test)] @@ -1187,3 +1214,67 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it assert_eq!(args["location"], "Paris"); } } + +#[cfg(test)] +// Just e2e tests to test the flow. Detailed tests are covered in the individual parsers +mod detect_parser_tests { + use super::*; + + #[test] + fn test_e2e_detect_tool_call_start_harmony() { + let text = r#"<|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json"#; + let result = detect_tool_call_start(text, Some("harmony")).unwrap(); + assert!(result); + } + + #[test] + fn test_e2e_detect_tool_call_start_hermes() { + let text = r#"{"name": "get_current_weather", "parameters": {"location": "Tokyo"}}"#; + let result = detect_tool_call_start(text, Some("hermes")).unwrap(); + assert!(result); + } + + #[test] + fn test_e2e_detect_tool_call_start_pythonic() { + let text = r#"foo(a=1, b=2), bar(x=3)]"#; + let result = detect_tool_call_start(text, Some("pythonic")).unwrap(); + assert!(!result); + } + + #[test] + fn test_e2e_detect_tool_call_start_nemotron_deci() { + let text = r#"[{"name": "get_current_weather", "parameters": {"location": "Tokyo"}}]"#; + let result = detect_tool_call_start(text, Some("nemotron_deci")).unwrap(); + assert!(result); + } + + #[test] + fn test_e2e_detect_tool_call_start_phi4() { + let text = + r#"functools{"name": "get_current_weather", "parameters": {"location": "Tokyo"}}"#; + let result = detect_tool_call_start(text, Some("phi4")).unwrap(); + assert!(result); + } + + #[test] + fn test_e2e_detect_tool_call_start_llama3_json() { + let text = r#"<|python_tag|>{ "name": "get_current_weather", "parameters": {"location": "Tokyo"}}"#; + let result = detect_tool_call_start(text, Some("llama3_json")).unwrap(); + assert!(result); + } + + #[test] + fn test_e2e_detect_tool_call_start_mistral() { + let text = + r#"[TOOL_CALLS]{"name": "get_current_weather", "parameters": {"location": "Tokyo"}}"#; + let result = detect_tool_call_start(text, Some("mistral")).unwrap(); + assert!(result); + } + + #[test] + fn test_e2e_detect_tool_call_start_deepseek_v3_1() { + let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather{"location": "Tokyo"}<|tool▁call▁end|>"#; + let result = detect_tool_call_start(text, Some("deepseek_v3_1")).unwrap(); + assert!(result); + } +} diff --git a/lib/parsers/src/tool_calling/pythonic/mod.rs b/lib/parsers/src/tool_calling/pythonic/mod.rs index 495e82fb80..b0a28846cb 100644 --- a/lib/parsers/src/tool_calling/pythonic/mod.rs +++ b/lib/parsers/src/tool_calling/pythonic/mod.rs @@ -4,4 +4,4 @@ pub mod pythonic_parser; pub use super::{config, response}; -pub use pythonic_parser::try_tool_call_parse_pythonic; +pub use pythonic_parser::{detect_tool_call_start_pythonic, try_tool_call_parse_pythonic}; diff --git a/lib/parsers/src/tool_calling/pythonic/pythonic_parser.rs b/lib/parsers/src/tool_calling/pythonic/pythonic_parser.rs index ab04ba4766..84272bad18 100644 --- a/lib/parsers/src/tool_calling/pythonic/pythonic_parser.rs +++ b/lib/parsers/src/tool_calling/pythonic/pythonic_parser.rs @@ -187,6 +187,16 @@ pub fn try_tool_call_parse_pythonic( Ok((tool_response?, Some(normal_text))) } +pub fn detect_tool_call_start_pythonic(chunk: &str) -> anyhow::Result { + // Format Structure: [tool1(arg1=val1, arg2=val2), tool2(arg1=val3)] + + // Check if the chunk contains atleast "[" + if !chunk.contains("[") { + return Ok(false); + } + Ok(true) +} + #[cfg(test)] mod tests { use super::*; @@ -353,3 +363,37 @@ mod tests { assert_eq!(args["x"], json!({"x": 3, "y": {"e": "f"}})); } } + +#[cfg(test)] +mod detect_parser_tests { + use super::*; + + #[test] + fn test_detect_tool_call_start_pythonic_chunk_with_tool_call_start_token() { + let text = r#"[foo(a=1, b=2), bar(x=3)]"#; + let result = detect_tool_call_start_pythonic(text).unwrap(); + assert!(result); + } + + #[test] + fn test_detect_tool_call_start_pythonic_chunk_without_tool_call_start_token() { + let text = r#"foo(a=1, b=2)"#; + let result = detect_tool_call_start_pythonic(text).unwrap(); + assert!(!result); + } + + #[test] + fn test_detect_tool_call_start_pythonic_chunk_with_tool_call_start_token_in_middle() { + let text = r#"information: [foo(a=1, b=2), bar(x=3)]"#; + let result = detect_tool_call_start_pythonic(text).unwrap(); + assert!(result); + } + + #[test] + fn test_detect_tool_call_start_pythonic_false_positive() { + // Since we detect just "[" as tool call start token, this will be a false positive + let text = r#"Hey [ There is one tool call here . foo(a=1, b=2)"#; + let result = detect_tool_call_start_pythonic(text).unwrap(); + assert!(result); + } +} From 8e53371b349658e91b8217de15eab34cee13624b Mon Sep 17 00:00:00 2001 From: ayushag Date: Sun, 7 Sep 2025 21:57:11 +0000 Subject: [PATCH 2/3] fix: add trim logic for chunks Signed-off-by: ayushag --- lib/parsers/src/tool_calling/json/base_json_parser.rs | 9 +++++++-- lib/parsers/src/tool_calling/json/deepseek_parser.rs | 7 ++++++- lib/parsers/src/tool_calling/pythonic/pythonic_parser.rs | 7 ++++++- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/lib/parsers/src/tool_calling/json/base_json_parser.rs b/lib/parsers/src/tool_calling/json/base_json_parser.rs index b6d7a3fb2c..80855a592e 100644 --- a/lib/parsers/src/tool_calling/json/base_json_parser.rs +++ b/lib/parsers/src/tool_calling/json/base_json_parser.rs @@ -311,18 +311,23 @@ pub fn detect_tool_call_start_basic_json( chunk: &str, config: &JsonParserConfig, ) -> anyhow::Result { + let trimmed = chunk.trim(); + if trimmed.is_empty() { + return Ok(false); + } + // Case 1: If there is any of the start tokens in the chunk, return true if config .tool_call_start_tokens .iter() - .any(|token| chunk.contains(token)) + .any(|token| trimmed.contains(token)) { return Ok(true); } // Case 2: If there is any "{" or "[" in the chunk, return true // This case will lead to false positives for those models which does not emit tool call start tokens - if chunk.contains("{") || chunk.contains("[") { + if trimmed.contains("{") || trimmed.contains("[") { return Ok(true); } Ok(false) diff --git a/lib/parsers/src/tool_calling/json/deepseek_parser.rs b/lib/parsers/src/tool_calling/json/deepseek_parser.rs index a720fb26fd..0e50195732 100644 --- a/lib/parsers/src/tool_calling/json/deepseek_parser.rs +++ b/lib/parsers/src/tool_calling/json/deepseek_parser.rs @@ -105,11 +105,16 @@ pub fn detect_tool_call_start_deepseek_v3_1( chunk: &str, config: &JsonParserConfig, ) -> anyhow::Result { + let trimmed = chunk.trim(); + if trimmed.is_empty() { + return Ok(false); + } + // if chunk contains tool_call_start_tokens then return true if config .tool_call_start_tokens .iter() - .any(|token| chunk.contains(token)) + .any(|token| trimmed.contains(token)) { return Ok(true); } diff --git a/lib/parsers/src/tool_calling/pythonic/pythonic_parser.rs b/lib/parsers/src/tool_calling/pythonic/pythonic_parser.rs index 84272bad18..4232dd32b4 100644 --- a/lib/parsers/src/tool_calling/pythonic/pythonic_parser.rs +++ b/lib/parsers/src/tool_calling/pythonic/pythonic_parser.rs @@ -188,10 +188,15 @@ pub fn try_tool_call_parse_pythonic( } pub fn detect_tool_call_start_pythonic(chunk: &str) -> anyhow::Result { + let trimmed = chunk.trim(); + if trimmed.is_empty() { + return Ok(false); + } + // Format Structure: [tool1(arg1=val1, arg2=val2), tool2(arg1=val3)] // Check if the chunk contains atleast "[" - if !chunk.contains("[") { + if !trimmed.contains("[") { return Ok(false); } Ok(true) From 8cd5b8d0fd555d9ae93ce9c836845cff6080d159 Mon Sep 17 00:00:00 2001 From: ayushag Date: Mon, 8 Sep 2025 19:19:14 +0000 Subject: [PATCH 3/3] chore: optimizations Signed-off-by: ayushag --- .../tool_calling/harmony/harmony_parser.rs | 19 +++------ .../src/tool_calling/json/base_json_parser.rs | 41 +++++++------------ .../src/tool_calling/json/deepseek_parser.rs | 31 +++++--------- lib/parsers/src/tool_calling/json/mod.rs | 2 +- lib/parsers/src/tool_calling/parsers.rs | 6 +-- .../tool_calling/pythonic/pythonic_parser.rs | 23 ++++------- 6 files changed, 43 insertions(+), 79 deletions(-) diff --git a/lib/parsers/src/tool_calling/harmony/harmony_parser.rs b/lib/parsers/src/tool_calling/harmony/harmony_parser.rs index 7e5a5e58ee..f779613fb7 100644 --- a/lib/parsers/src/tool_calling/harmony/harmony_parser.rs +++ b/lib/parsers/src/tool_calling/harmony/harmony_parser.rs @@ -29,7 +29,7 @@ pub fn parse_tool_calls_harmony( // Check if tool call start tokens are present, if not return everything as normal text // Start Token: "<|start|>assistant<|channel|>commentary" should be present in the text if tool calls are present // End Token: "<|call|>" - if !detect_tool_call_start_harmony(text, config).unwrap_or(false) { + if !detect_tool_call_start_harmony(text, config) { return Ok((vec![], Some(trimmed))); } @@ -154,22 +154,15 @@ pub fn parse_tool_calls_harmony( Ok((res, Some(normal_text.to_string()))) } -pub fn detect_tool_call_start_harmony( - chunk: &str, - config: &JsonParserConfig, -) -> anyhow::Result { +pub fn detect_tool_call_start_harmony(chunk: &str, config: &JsonParserConfig) -> bool { let trimmed = chunk.trim(); if trimmed.is_empty() { - return Ok(false); + return false; } - if config + config .tool_call_start_tokens .iter() .any(|token| trimmed.contains(token)) - { - return Ok(true); - } - Ok(false) } #[cfg(test)] @@ -297,7 +290,7 @@ mod detect_parser_tests { tool_call_end_tokens: vec!["<|call|>".to_string()], ..Default::default() }; - let result = detect_tool_call_start_harmony(text, &config).unwrap(); + let result = detect_tool_call_start_harmony(text, &config); assert!(result); } @@ -309,7 +302,7 @@ mod detect_parser_tests { tool_call_end_tokens: vec!["<|call|>".to_string()], ..Default::default() }; - let result = detect_tool_call_start_harmony(text, &config).unwrap(); + let result = detect_tool_call_start_harmony(text, &config); assert!(!result); } } diff --git a/lib/parsers/src/tool_calling/json/base_json_parser.rs b/lib/parsers/src/tool_calling/json/base_json_parser.rs index 80855a592e..f6aacf05d4 100644 --- a/lib/parsers/src/tool_calling/json/base_json_parser.rs +++ b/lib/parsers/src/tool_calling/json/base_json_parser.rs @@ -307,30 +307,17 @@ pub fn try_tool_call_parse_basic_json( Ok((vec![], Some(trimmed.to_string()))) } -pub fn detect_tool_call_start_basic_json( - chunk: &str, - config: &JsonParserConfig, -) -> anyhow::Result { +pub fn detect_tool_call_start_basic_json(chunk: &str, config: &JsonParserConfig) -> bool { let trimmed = chunk.trim(); if trimmed.is_empty() { - return Ok(false); + return false; } - - // Case 1: If there is any of the start tokens in the chunk, return true - if config + config .tool_call_start_tokens .iter() .any(|token| trimmed.contains(token)) - { - return Ok(true); - } - - // Case 2: If there is any "{" or "[" in the chunk, return true - // This case will lead to false positives for those models which does not emit tool call start tokens - if trimmed.contains("{") || trimmed.contains("[") { - return Ok(true); - } - Ok(false) + || trimmed.contains('{') + || trimmed.contains('[') } #[cfg(test)] @@ -346,7 +333,7 @@ mod detect_parser_tests { tool_call_end_tokens: vec!["".to_string()], ..Default::default() }; - let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + let result = detect_tool_call_start_basic_json(text, &config); assert!(result); } @@ -358,7 +345,7 @@ mod detect_parser_tests { tool_call_end_tokens: vec!["".to_string()], ..Default::default() }; - let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + let result = detect_tool_call_start_basic_json(text, &config); assert!(result); } @@ -370,7 +357,7 @@ mod detect_parser_tests { tool_call_end_tokens: vec!["".to_string()], ..Default::default() }; - let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + let result = detect_tool_call_start_basic_json(text, &config); assert!(result); } @@ -383,7 +370,7 @@ mod detect_parser_tests { tool_call_end_tokens: vec!["".to_string()], ..Default::default() }; - let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + let result = detect_tool_call_start_basic_json(text, &config); assert!(result); } @@ -396,7 +383,7 @@ mod detect_parser_tests { tool_call_end_tokens: vec!["".to_string()], ..Default::default() }; - let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + let result = detect_tool_call_start_basic_json(text, &config); assert!(result); } @@ -409,7 +396,7 @@ mod detect_parser_tests { tool_call_end_tokens: vec!["".to_string()], ..Default::default() }; - let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + let result = detect_tool_call_start_basic_json(text, &config); assert!(result); } @@ -421,7 +408,7 @@ mod detect_parser_tests { tool_call_end_tokens: vec!["".to_string()], ..Default::default() }; - let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + let result = detect_tool_call_start_basic_json(text, &config); assert!(result); } @@ -433,7 +420,7 @@ mod detect_parser_tests { tool_call_end_tokens: vec!["".to_string()], ..Default::default() }; - let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + let result = detect_tool_call_start_basic_json(text, &config); assert!(result); } @@ -445,7 +432,7 @@ mod detect_parser_tests { tool_call_end_tokens: vec!["".to_string()], ..Default::default() }; - let result = detect_tool_call_start_basic_json(text, &config).unwrap(); + let result = detect_tool_call_start_basic_json(text, &config); assert!(result); } } diff --git a/lib/parsers/src/tool_calling/json/deepseek_parser.rs b/lib/parsers/src/tool_calling/json/deepseek_parser.rs index 0e50195732..76ef0a661e 100644 --- a/lib/parsers/src/tool_calling/json/deepseek_parser.rs +++ b/lib/parsers/src/tool_calling/json/deepseek_parser.rs @@ -43,7 +43,7 @@ pub fn parse_tool_calls_deepseek_v3_1( } // If tool call start token is not present then, no tool calls are there, return empty tool calls and the original trimmed string - if !detect_tool_call_start_deepseek_v3_1(trimmed, config).unwrap_or(false) { + if !detect_tool_call_start_deepseek_v3_1(trimmed, config) { return Ok((vec![], Some(trimmed.to_string()))); } @@ -101,24 +101,13 @@ pub fn parse_tool_calls_deepseek_v3_1( Ok((tool_calls, Some(normal_text))) } -pub fn detect_tool_call_start_deepseek_v3_1( - chunk: &str, - config: &JsonParserConfig, -) -> anyhow::Result { +pub fn detect_tool_call_start_deepseek_v3_1(chunk: &str, config: &JsonParserConfig) -> bool { let trimmed = chunk.trim(); - if trimmed.is_empty() { - return Ok(false); - } - - // if chunk contains tool_call_start_tokens then return true - if config - .tool_call_start_tokens - .iter() - .any(|token| trimmed.contains(token)) - { - return Ok(true); - } - Ok(false) + !trimmed.is_empty() + && config + .tool_call_start_tokens + .iter() + .any(|token| trimmed.contains(token)) } #[cfg(test)] @@ -247,7 +236,7 @@ mod detect_parser_tests { tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], ..Default::default() }; - let result = detect_tool_call_start_deepseek_v3_1(text, &config).unwrap(); + let result = detect_tool_call_start_deepseek_v3_1(text, &config); assert!(result); } @@ -259,7 +248,7 @@ mod detect_parser_tests { tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], ..Default::default() }; - let result = detect_tool_call_start_deepseek_v3_1(text, &config).unwrap(); + let result = detect_tool_call_start_deepseek_v3_1(text, &config); assert!(!result); } @@ -271,7 +260,7 @@ mod detect_parser_tests { tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], ..Default::default() }; - let result = detect_tool_call_start_deepseek_v3_1(text, &config).unwrap(); + let result = detect_tool_call_start_deepseek_v3_1(text, &config); assert!(result); } } diff --git a/lib/parsers/src/tool_calling/json/mod.rs b/lib/parsers/src/tool_calling/json/mod.rs index 21475ce36a..184c180b23 100644 --- a/lib/parsers/src/tool_calling/json/mod.rs +++ b/lib/parsers/src/tool_calling/json/mod.rs @@ -35,7 +35,7 @@ pub fn try_tool_call_parse_json( } } -pub fn detect_tool_call_start_json(chunk: &str, config: &JsonParserConfig) -> anyhow::Result { +pub fn detect_tool_call_start_json(chunk: &str, config: &JsonParserConfig) -> bool { match config.parser_type { JsonParserType::Basic => detect_tool_call_start_basic_json(chunk, config), JsonParserType::DeepseekV31 => detect_tool_call_start_deepseek_v3_1(chunk, config), diff --git a/lib/parsers/src/tool_calling/parsers.rs b/lib/parsers/src/tool_calling/parsers.rs index fef379dd51..7453b8e3ab 100644 --- a/lib/parsers/src/tool_calling/parsers.rs +++ b/lib/parsers/src/tool_calling/parsers.rs @@ -95,9 +95,9 @@ pub fn detect_tool_call_start(chunk: &str, parser_str: Option<&str>) -> anyhow:: match parser_map.get(parser_key) { Some(config) => match config.format { - ToolCallParserType::Json => detect_tool_call_start_json(chunk, &config.json), - ToolCallParserType::Harmony => detect_tool_call_start_harmony(chunk, &config.json), - ToolCallParserType::Pythonic => detect_tool_call_start_pythonic(chunk), + ToolCallParserType::Json => Ok(detect_tool_call_start_json(chunk, &config.json)), + ToolCallParserType::Harmony => Ok(detect_tool_call_start_harmony(chunk, &config.json)), + ToolCallParserType::Pythonic => Ok(detect_tool_call_start_pythonic(chunk)), ToolCallParserType::Typescript => { anyhow::bail!("Typescript parser not implemented"); } diff --git a/lib/parsers/src/tool_calling/pythonic/pythonic_parser.rs b/lib/parsers/src/tool_calling/pythonic/pythonic_parser.rs index 4232dd32b4..4dbdc8f331 100644 --- a/lib/parsers/src/tool_calling/pythonic/pythonic_parser.rs +++ b/lib/parsers/src/tool_calling/pythonic/pythonic_parser.rs @@ -187,19 +187,14 @@ pub fn try_tool_call_parse_pythonic( Ok((tool_response?, Some(normal_text))) } -pub fn detect_tool_call_start_pythonic(chunk: &str) -> anyhow::Result { +pub fn detect_tool_call_start_pythonic(chunk: &str) -> bool { let trimmed = chunk.trim(); + // Early return for empty input if trimmed.is_empty() { - return Ok(false); + return false; } - - // Format Structure: [tool1(arg1=val1, arg2=val2), tool2(arg1=val3)] - - // Check if the chunk contains atleast "[" - if !trimmed.contains("[") { - return Ok(false); - } - Ok(true) + // Heuristic: Pythonic tool calls always start with a '[' somewhere in the chunk + trimmed.contains('[') } #[cfg(test)] @@ -376,21 +371,21 @@ mod detect_parser_tests { #[test] fn test_detect_tool_call_start_pythonic_chunk_with_tool_call_start_token() { let text = r#"[foo(a=1, b=2), bar(x=3)]"#; - let result = detect_tool_call_start_pythonic(text).unwrap(); + let result = detect_tool_call_start_pythonic(text); assert!(result); } #[test] fn test_detect_tool_call_start_pythonic_chunk_without_tool_call_start_token() { let text = r#"foo(a=1, b=2)"#; - let result = detect_tool_call_start_pythonic(text).unwrap(); + let result = detect_tool_call_start_pythonic(text); assert!(!result); } #[test] fn test_detect_tool_call_start_pythonic_chunk_with_tool_call_start_token_in_middle() { let text = r#"information: [foo(a=1, b=2), bar(x=3)]"#; - let result = detect_tool_call_start_pythonic(text).unwrap(); + let result = detect_tool_call_start_pythonic(text); assert!(result); } @@ -398,7 +393,7 @@ mod detect_parser_tests { fn test_detect_tool_call_start_pythonic_false_positive() { // Since we detect just "[" as tool call start token, this will be a false positive let text = r#"Hey [ There is one tool call here . foo(a=1, b=2)"#; - let result = detect_tool_call_start_pythonic(text).unwrap(); + let result = detect_tool_call_start_pythonic(text); assert!(result); } }