Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 48 additions & 5 deletions lib/parsers/src/tool_calling/harmony/harmony_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,7 @@ pub fn parse_tool_calls_harmony(
// Check if tool call start tokens are present, if not return everything as normal text
// Start Token: "<|start|>assistant<|channel|>commentary" should be present in the text if tool calls are present
// End Token: "<|call|>"
if !config
.tool_call_start_tokens
.iter()
.any(|token| trimmed.contains(token))
{
if !detect_tool_call_start_harmony(text, config).unwrap_or(false) {
return Ok((vec![], Some(trimmed)));
}

Expand Down Expand Up @@ -158,6 +154,24 @@ pub fn parse_tool_calls_harmony(
Ok((res, Some(normal_text.to_string())))
}

pub fn detect_tool_call_start_harmony(
chunk: &str,
config: &JsonParserConfig,
) -> anyhow::Result<bool> {
let trimmed = chunk.trim();
if trimmed.is_empty() {
return Ok(false);
}
if config
.tool_call_start_tokens
.iter()
.any(|token| trimmed.contains(token))
{
return Ok(true);
}
Ok(false)
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -270,3 +284,32 @@ mod tests {
assert_eq!(args["unit"], "celsius");
}
}

#[cfg(test)]
mod detect_parser_tests {
use super::*;

#[test]
fn test_detect_tool_call_start_harmony_chunk_with_tool_call_start_token() {
let text = r#"<|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json"#;
let config = JsonParserConfig {
tool_call_start_tokens: vec!["<|start|>assistant<|channel|>commentary".to_string()],
tool_call_end_tokens: vec!["<|call|>".to_string()],
..Default::default()
};
let result = detect_tool_call_start_harmony(text, &config).unwrap();
assert!(result);
}

#[test]
fn test_detect_tool_call_start_harmony_chunk_without_tool_call_start_token() {
let text = r#"<|channel|>commentary to=functions.get_current_weather"#;
let config = JsonParserConfig {
tool_call_start_tokens: vec!["<|start|>assistant<|channel|>commentary".to_string()],
tool_call_end_tokens: vec!["<|call|>".to_string()],
..Default::default()
};
let result = detect_tool_call_start_harmony(text, &config).unwrap();
assert!(!result);
}
}
2 changes: 1 addition & 1 deletion lib/parsers/src/tool_calling/harmony/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
pub mod harmony_parser;

pub use super::{config, response};
pub use harmony_parser::parse_tool_calls_harmony;
pub use harmony_parser::{detect_tool_call_start_harmony, parse_tool_calls_harmony};
138 changes: 138 additions & 0 deletions lib/parsers/src/tool_calling/json/base_json_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,3 +306,141 @@ pub fn try_tool_call_parse_basic_json(

Ok((vec![], Some(trimmed.to_string())))
}

pub fn detect_tool_call_start_basic_json(
chunk: &str,
config: &JsonParserConfig,
) -> anyhow::Result<bool> {
// Case 1: If there is any of the start tokens in the chunk, return true
if config
.tool_call_start_tokens
.iter()
.any(|token| chunk.contains(token))
{
return Ok(true);
}

// Case 2: If there is any "{" or "[" in the chunk, return true
// This case will lead to false positives for those models which does not emit tool call start tokens
if chunk.contains("{") || chunk.contains("[") {
return Ok(true);
}
Ok(false)
}

#[cfg(test)]
mod detect_parser_tests {
use super::*;

#[test]
fn detect_tool_call_start_basic_json_chunk_with_tool_call_start_token_hermes() {
let text =
r#"<tool_call>{"name": "search", "parameters": { "query": "rust" } }</tool_call>"#;
let config = JsonParserConfig {
tool_call_start_tokens: vec!["<tool_call>".to_string()],
tool_call_end_tokens: vec!["</tool_call>".to_string()],
..Default::default()
};
let result = detect_tool_call_start_basic_json(text, &config).unwrap();
assert!(result);
}

#[test]
fn detect_tool_call_start_basic_json_chunk_without_tool_call_start_token() {
let text = r#"{"name": "search", "parameters": { "query": "rust" } }"#;
let config = JsonParserConfig {
tool_call_start_tokens: vec!["<tool_call>".to_string()],
tool_call_end_tokens: vec!["</tool_call>".to_string()],
..Default::default()
};
let result = detect_tool_call_start_basic_json(text, &config).unwrap();
assert!(result);
}

#[test]
fn detect_tool_call_start_basic_json_chunk_without_tool_call_start_token_with_normal_text() {
let text = r#"Here it is {"name": "#;
let config = JsonParserConfig {
tool_call_start_tokens: vec!["<tool_call>".to_string()],
tool_call_end_tokens: vec!["</tool_call>".to_string()],
..Default::default()
};
let result = detect_tool_call_start_basic_json(text, &config).unwrap();
assert!(result);
}

#[test]
fn detect_tool_call_start_basic_json_chunk_with_square_brackets() {
// These kind of false positives are expected when calling this function for stream=True
let text = r#"Here it is [{"name": "search","#;
let config = JsonParserConfig {
tool_call_start_tokens: vec!["<tool_call>".to_string()],
tool_call_end_tokens: vec!["</tool_call>".to_string()],
..Default::default()
};
let result = detect_tool_call_start_basic_json(text, &config).unwrap();
assert!(result);
}

#[test]
fn detect_tool_call_start_basic_json_chunk_false_positive() {
// These kind of false positives are expected when calling this function for stream=True
let text = r#"Here it is { Whats up"#;
let config = JsonParserConfig {
tool_call_start_tokens: vec!["<tool_call>".to_string()],
tool_call_end_tokens: vec!["</tool_call>".to_string()],
..Default::default()
};
let result = detect_tool_call_start_basic_json(text, &config).unwrap();
assert!(result);
}

#[test]
fn detect_tool_call_start_basic_json_chunk_with_tool_call_start_token_nemotron_deci() {
let text =
r#"<TOOLCALL>[{"name": "search", "parameters": { "query": "rust" } }]</TOOLCALL>"#;
let config = JsonParserConfig {
tool_call_start_tokens: vec!["<TOOLCALL>".to_string()],
tool_call_end_tokens: vec!["</TOOLCALL>".to_string()],
..Default::default()
};
let result = detect_tool_call_start_basic_json(text, &config).unwrap();
assert!(result);
}

#[test]
fn detect_tool_call_start_basic_json_chunk_with_lllama3_json_token() {
let text = r#"<|python_tag|>{ "name": }"#;
let config = JsonParserConfig {
tool_call_start_tokens: vec!["<|python_tag|>".to_string()],
tool_call_end_tokens: vec!["".to_string()],
..Default::default()
};
let result = detect_tool_call_start_basic_json(text, &config).unwrap();
assert!(result);
}

#[test]
fn detect_tool_call_start_basic_json_chunk_mistral_token() {
let text = r#"Hello Yo ! [TOOL_CALLS]{"name": "search", "#;
let config = JsonParserConfig {
tool_call_start_tokens: vec!["[TOOL_CALLS]".to_string()],
tool_call_end_tokens: vec!["".to_string()],
..Default::default()
};
let result = detect_tool_call_start_basic_json(text, &config).unwrap();
assert!(result);
}

#[test]
fn detect_tool_call_start_basic_json_chunk_phi4_token() {
let text = r#"functools{"name": "search", "#;
let config = JsonParserConfig {
tool_call_start_tokens: vec!["functools".to_string()],
tool_call_end_tokens: vec!["".to_string()],
..Default::default()
};
let result = detect_tool_call_start_basic_json(text, &config).unwrap();
assert!(result);
}
}
62 changes: 56 additions & 6 deletions lib/parsers/src/tool_calling/json/deepseek_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,7 @@ pub fn parse_tool_calls_deepseek_v3_1(
}

// If tool call start token is not present then, no tool calls are there, return empty tool calls and the original trimmed string
if let Some(start_token) = tool_call_start_tokens.first() {
if !trimmed.contains(start_token) {
return Ok((vec![], Some(trimmed.to_string())));
}
} else {
// Invalid start token
if !detect_tool_call_start_deepseek_v3_1(trimmed, config).unwrap_or(false) {
return Ok((vec![], Some(trimmed.to_string())));
}

Expand Down Expand Up @@ -106,6 +101,21 @@ pub fn parse_tool_calls_deepseek_v3_1(
Ok((tool_calls, Some(normal_text)))
}

pub fn detect_tool_call_start_deepseek_v3_1(
chunk: &str,
config: &JsonParserConfig,
) -> anyhow::Result<bool> {
// if chunk contains tool_call_start_tokens then return true
if config
.tool_call_start_tokens
.iter()
.any(|token| chunk.contains(token))
{
return Ok(true);
}
Ok(false)
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -220,3 +230,43 @@ mod tests {
assert_eq!(result.len(), 0);
}
}

#[cfg(test)]
mod detect_parser_tests {
use super::*;
#[test]
fn test_detect_tool_call_start_deepseek_v3_1_chunk_with_tool_call_start_token() {
let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather宽带}"#;
let config = JsonParserConfig {
tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()],
tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()],
..Default::default()
};
let result = detect_tool_call_start_deepseek_v3_1(text, &config).unwrap();
assert!(result);
}

#[test]
fn test_detect_tool_call_start_deepseek_v3_1_chunk_without_tool_call_start_token() {
let text = r#"<|tool▁call▁begin|>get_current_weather宽带}"#;
let config = JsonParserConfig {
tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()],
tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()],
..Default::default()
};
let result = detect_tool_call_start_deepseek_v3_1(text, &config).unwrap();
assert!(!result);
}

#[test]
fn test_detect_tool_call_start_deepseek_v3_1_chunk_with_tool_call_start_token_in_middle() {
let text = r#"The following tool calls retrieve weather information: <|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather宽带}"#;
let config = JsonParserConfig {
tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()],
tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()],
..Default::default()
};
let result = detect_tool_call_start_deepseek_v3_1(text, &config).unwrap();
assert!(result);
}
}
11 changes: 9 additions & 2 deletions lib/parsers/src/tool_calling/json/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ pub mod base_json_parser;
pub mod deepseek_parser;

pub use super::{config, response};
pub use base_json_parser::try_tool_call_parse_basic_json;
pub use deepseek_parser::parse_tool_calls_deepseek_v3_1;
pub use base_json_parser::{detect_tool_call_start_basic_json, try_tool_call_parse_basic_json};
pub use deepseek_parser::{detect_tool_call_start_deepseek_v3_1, parse_tool_calls_deepseek_v3_1};

pub use super::config::JsonParserConfig;
pub use super::response::ToolCallResponse;
Expand Down Expand Up @@ -34,3 +34,10 @@ pub fn try_tool_call_parse_json(
JsonParserType::DeepseekV31 => parse_tool_calls_deepseek_v3_1(message, config),
}
}

pub fn detect_tool_call_start_json(chunk: &str, config: &JsonParserConfig) -> anyhow::Result<bool> {
match config.parser_type {
JsonParserType::Basic => detect_tool_call_start_basic_json(chunk, config),
JsonParserType::DeepseekV31 => detect_tool_call_start_deepseek_v3_1(chunk, config),
}
}
Loading
Loading